libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2011 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 ////////////////////////////////////////////////////////////////////////// 00019 00020 #include "CigarHelper.h" 00021 00022 // Soft Clip from the beginning of the read to the specified reference position. 00023 int32_t CigarHelper::softClipBeginByRefPos(SamRecord& record, 00024 int32_t refPosition0Based, 00025 CigarRoller& newCigar, 00026 int32_t &new0BasedPosition) 00027 { 00028 newCigar.clear(); 00029 Cigar* cigar = record.getCigarInfo(); 00030 if(cigar == NULL) 00031 { 00032 // Failed to get the cigar. 00033 ErrorHandler::handleError("Soft clipping, but failed to read the cigar"); 00034 return(NO_CLIP); 00035 } 00036 00037 // No cigar or position in the record, so return no clip. 00038 if((cigar->size() == 0) || (record.get0BasedPosition() == -1)) 00039 { 00040 return(NO_CLIP); 00041 } 00042 00043 // Check to see if the reference position occurs before the record starts, 00044 // if it does, do no clipping. 00045 if(refPosition0Based < record.get0BasedPosition()) 00046 { 00047 // Not within this read, so nothing to clip. 00048 newCigar.Set(record.getCigar()); 00049 return(NO_CLIP); 00050 } 00051 00052 // The position falls after the read starts, so loop through until the 00053 // position or the end of the read is found. 00054 int32_t readClipPosition = 0; 00055 bool clipWritten = false; 00056 new0BasedPosition = record.get0BasedPosition(); 00057 for(int i = 0; i < cigar->size(); i++) 00058 { 00059 const Cigar::CigarOperator* op = &(cigar->getOperator(i)); 00060 00061 if(clipWritten) 00062 { 00063 // Clip point has been found, so just add everything. 00064 newCigar += *op; 00065 // Go to the next operation. 00066 continue; 00067 } 00068 00069 // The clip point has not yet been found, so check to see if we found 00070 // it now. 00071 00072 // Not a clip, check to see if the operation is found in the 00073 // reference. 00074 if(Cigar::foundInReference(*op)) 00075 { 00076 // match, mismatch, deletion, skip 00077 00078 // increment the current reference position to just past this 00079 // operation. 00080 new0BasedPosition += op->count; 00081 00082 // Check to see if this is also in the query, because otherwise 00083 // the operation is still being consumed. 00084 if(Cigar::foundInQuery(*op)) 00085 { 00086 // Also in the query, determine if the entire thing should 00087 // be clipped or just part of it. 00088 00089 uint32_t numKeep = 0; 00090 // Check to see if we have hit our clip position. 00091 if(refPosition0Based < new0BasedPosition) 00092 { 00093 // The specified clip position is in this cigar operation. 00094 numKeep = new0BasedPosition - refPosition0Based - 1; 00095 00096 if(numKeep > op->count) 00097 { 00098 // Keep the entire read. This happens because 00099 // we keep reading until the first match/mismatch 00100 // after the clip. 00101 numKeep = op->count; 00102 } 00103 } 00104 00105 // Add the part of this operation that is being clipped 00106 // to the clip count. 00107 readClipPosition += (op->count - numKeep); 00108 00109 // Only write the clip if we found a match/mismatch 00110 // to write. Otherwise we will keep accumulating clips 00111 // for the case of insertions. 00112 if(numKeep > 0) 00113 { 00114 new0BasedPosition -= numKeep; 00115 00116 newCigar.Add(Cigar::softClip, readClipPosition); 00117 00118 // Add the clipped part of this cigar to the clip 00119 // position. 00120 newCigar.Add(op->operation, numKeep); 00121 00122 // Found a match after the clip point, so stop 00123 // consuming cigar operations. 00124 clipWritten = true; 00125 continue; 00126 } 00127 } 00128 } 00129 else 00130 { 00131 // Only add hard clips. The softclips will be added in 00132 // when the total number is found. 00133 if(op->operation == Cigar::hardClip) 00134 { 00135 // Check if this is the first operation, if so, just write it. 00136 if(i == 0) 00137 { 00138 newCigar += *op; 00139 } 00140 // Check if it is the last operation (otherwise skip it). 00141 else if(i == (cigar->size() - 1)) 00142 { 00143 // Check whether or not the clip was ever written, and if 00144 // not, write it. 00145 if(clipWritten == false) 00146 { 00147 newCigar.Add(Cigar::softClip, readClipPosition); 00148 // Since no match/mismatch was ever found, set 00149 // the new ref position to the original one. 00150 new0BasedPosition = record.get0BasedPosition(); 00151 clipWritten = true; 00152 } 00153 // Add the hard clip. 00154 newCigar += *op; 00155 } 00156 } 00157 // Not yet to the clip position, so do not add this operation. 00158 if(Cigar::foundInQuery(*op)) 00159 { 00160 // Found in the query, so update the read clip position. 00161 readClipPosition += op->count; 00162 } 00163 } 00164 } // End loop through cigar. 00165 00166 00167 // Check whether or not the clip was ever written, and if 00168 // not, write it. 00169 if(clipWritten == false) 00170 { 00171 newCigar.Add(Cigar::softClip, readClipPosition); 00172 // Since no match/mismatch was ever found, set 00173 // the new ref position to the original one. 00174 new0BasedPosition = record.get0BasedPosition(); 00175 } 00176 00177 // Subtract 1 since readClipPosition atually contains the first 0based 00178 // position that is not clipped. 00179 return(readClipPosition - 1); 00180 } 00181 00182 00183 // Soft Clip from the end of the read at the specified reference position. 00184 int32_t CigarHelper::softClipEndByRefPos(SamRecord& record, 00185 int32_t refPosition0Based, 00186 CigarRoller& newCigar) 00187 { 00188 newCigar.clear(); 00189 Cigar* cigar = record.getCigarInfo(); 00190 if(cigar == NULL) 00191 { 00192 // Failed to get the cigar. 00193 ErrorHandler::handleError("Soft clipping, but failed to read the cigar"); 00194 return(NO_CLIP); 00195 } 00196 00197 // No cigar or position in the record, so return no clip. 00198 if((cigar->size() == 0) || (record.get0BasedPosition() == -1)) 00199 { 00200 return(NO_CLIP); 00201 } 00202 00203 // Check to see if the reference position occurs after the record ends, 00204 // if so, do no clipping. 00205 if(refPosition0Based > record.get0BasedAlignmentEnd()) 00206 { 00207 // Not within this read, so nothing to clip. 00208 newCigar.Set(record.getCigar()); 00209 return(NO_CLIP); 00210 } 00211 00212 // The position falls before the read ends, so loop through until the 00213 // position is found. 00214 int32_t currentRefPosition = record.get0BasedPosition(); 00215 int32_t readClipPosition = 0; 00216 for(int i = 0; i < cigar->size(); i++) 00217 { 00218 const Cigar::CigarOperator* op = &(cigar->getOperator(i)); 00219 00220 // If the operation is found in the reference, increase the 00221 // reference position. 00222 if(Cigar::foundInReference(*op)) 00223 { 00224 // match, mismatch, deletion, skip 00225 // increment the current reference position to just past 00226 // this operation. 00227 currentRefPosition += op->count; 00228 } 00229 00230 // Check to see if we have hit our clip position. 00231 if(refPosition0Based < currentRefPosition) 00232 { 00233 // If this read is also in the query (match/mismatch), 00234 // write the partial op to the new cigar. 00235 int32_t numKeep = 0; 00236 if(Cigar::foundInQuery(*op)) 00237 { 00238 numKeep = op->count - (currentRefPosition - refPosition0Based); 00239 if(numKeep > 0) 00240 { 00241 newCigar.Add(op->operation, numKeep); 00242 readClipPosition += numKeep; 00243 } 00244 } 00245 else if(Cigar::isClip(*op)) 00246 { 00247 // This is a hard clip, so write it. 00248 newCigar.Add(op->operation, op->count); 00249 } 00250 else 00251 { 00252 00253 // Not found in the query (skip/deletion), 00254 // so don't write any of the operation. 00255 } 00256 // Found the clip point, so break. 00257 break; 00258 } 00259 else if(refPosition0Based == currentRefPosition) 00260 { 00261 newCigar += *op; 00262 if(Cigar::foundInQuery(*op)) 00263 { 00264 readClipPosition += op->count; 00265 } 00266 } 00267 else 00268 { 00269 // Not yet to the clip position, so add this operation/size to 00270 // the new cigar. 00271 newCigar += *op; 00272 if(Cigar::foundInQuery(*op)) 00273 { 00274 // Found in the query, so update the read clip position. 00275 readClipPosition += op->count; 00276 } 00277 } 00278 } // End loop through cigar. 00279 00280 // Before adding the softclip, read from the end of the cigar checking to 00281 // see if the operations are in the query, removing operations that are 00282 // not (pad/delete/skip) until a hardclip or an operation in the query is 00283 // found. We do not want a pad/delete/skip right before a softclip. 00284 for(int j = newCigar.size() - 1; j >= 0; j--) 00285 { 00286 const Cigar::CigarOperator* op = &(newCigar.getOperator(j)); 00287 if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op)) 00288 { 00289 // pad/delete/skip 00290 newCigar.Remove(j); 00291 } 00292 else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op)) 00293 { 00294 // Soft clip, so increment the clip position for the return value. 00295 // Remove the softclip since the readClipPosition is used to 00296 // calculate teh size of the soft clip added. 00297 readClipPosition -= op->count; 00298 newCigar.Remove(j); 00299 } 00300 else 00301 { 00302 // Found a cigar operation that should not be deleted, so stop deleting. 00303 break; 00304 } 00305 } 00306 00307 // Determine the number of soft clips. 00308 int32_t numSoftClips = record.getReadLength() - readClipPosition; 00309 // NOTE that if the previous operation is a softclip, the CigarRoller logic 00310 // will merge this with that one. 00311 newCigar.Add(Cigar::softClip, numSoftClips); 00312 00313 // Check if an ending hard clip needs to be added. 00314 if(cigar->size() != 0) 00315 { 00316 const Cigar::CigarOperator* lastOp = 00317 &(cigar->getOperator(cigar->size() - 1)); 00318 if(lastOp->operation == Cigar::hardClip) 00319 { 00320 newCigar += *lastOp; 00321 } 00322 } 00323 00324 return(readClipPosition); 00325 }