Cigar.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include "Cigar.h"
00021 #include "STLUtilities.h"
00022
00023
00024 const int32_t Cigar::INDEX_NA = -1;
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 void Cigar::getCigarString(std::string& cigarString) const
00037 {
00038 using namespace STLUtilities;
00039
00040 std::vector<CigarOperator>::const_iterator i;
00041
00042 cigarString.clear();
00043
00044
00045
00046 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00047 {
00048 cigarString << (*i).count << (*i).getChar();
00049 }
00050 }
00051
00052 void Cigar::getCigarString(String& cigarString) const
00053 {
00054 std::string cigar;
00055
00056 getCigarString(cigar);
00057
00058 cigarString = cigar.c_str();
00059
00060 return;
00061 }
00062
00063 void Cigar::getExpandedString(std::string &s) const
00064 {
00065 s = "";
00066
00067 std::vector<CigarOperator>::const_iterator i;
00068
00069
00070
00071
00072 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00073 {
00074 for (uint32_t j = 0; j<(*i).count; j++) s += (*i).getChar();
00075 }
00076 return;
00077 }
00078
00079
00080 bool Cigar::operator == (Cigar &rhs) const
00081 {
00082
00083 if (this->size() != rhs.size()) return false;
00084
00085 for (int i = 0; i < this->size(); i++)
00086 {
00087 if (cigarOperations[i]!=rhs.cigarOperations[i]) return false;
00088 }
00089 return true;
00090 }
00091
00092
00093
00094
00095 int Cigar::getExpectedQueryBaseCount() const
00096 {
00097 int matchCount = 0;
00098 std::vector<CigarOperator>::const_iterator i;
00099 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00100 {
00101 switch (i->operation)
00102 {
00103 case match:
00104 case mismatch:
00105 case softClip:
00106 case insert:
00107 matchCount += i->count;
00108 break;
00109 default:
00110
00111 break;
00112 }
00113 }
00114 return matchCount;
00115 }
00116
00117
00118
00119
00120 int Cigar::getExpectedReferenceBaseCount() const
00121 {
00122 int matchCount = 0;
00123 std::vector<CigarOperator>::const_iterator i;
00124 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00125 {
00126 switch (i->operation)
00127 {
00128 case match:
00129 case mismatch:
00130 case del:
00131 case skip:
00132 matchCount += i->count;
00133 break;
00134 default:
00135
00136 break;
00137 }
00138 }
00139 return matchCount;
00140 }
00141
00142
00143
00144 int Cigar::getNumBeginClips() const
00145 {
00146 int numBeginClips = 0;
00147 for (unsigned int i = 0; i != cigarOperations.size(); i++)
00148 {
00149 if ((cigarOperations[i].operation == softClip) ||
00150 (cigarOperations[i].operation == hardClip))
00151 {
00152
00153 numBeginClips += cigarOperations[i].count;
00154 }
00155 else
00156 {
00157
00158 break;
00159 }
00160 }
00161 return(numBeginClips);
00162 }
00163
00164
00165
00166 int Cigar::getNumEndClips() const
00167 {
00168 int numEndClips = 0;
00169 for (int i = (cigarOperations.size() - 1); i >= 0; i--)
00170 {
00171 if ((cigarOperations[i].operation == softClip) ||
00172 (cigarOperations[i].operation == hardClip))
00173 {
00174
00175 numEndClips += cigarOperations[i].count;
00176 }
00177 else
00178 {
00179
00180 break;
00181 }
00182 }
00183 return(numEndClips);
00184 }
00185
00186
00187 int32_t Cigar::getRefOffset(int32_t queryIndex)
00188 {
00189
00190 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00191 {
00192 setQueryAndReferenceIndexes();
00193 }
00194 if ((queryIndex < 0) || ((uint32_t)queryIndex >= queryToRef.size()))
00195 {
00196 return(INDEX_NA);
00197 }
00198 return(queryToRef[queryIndex]);
00199 }
00200
00201
00202 int32_t Cigar::getQueryIndex(int32_t refOffset)
00203 {
00204
00205 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00206 {
00207 setQueryAndReferenceIndexes();
00208 }
00209 if ((refOffset < 0) || ((uint32_t)refOffset >= refToQuery.size()))
00210 {
00211 return(INDEX_NA);
00212 }
00213 return(refToQuery[refOffset]);
00214 }
00215
00216
00217 int32_t Cigar::getRefPosition(int32_t queryIndex, int32_t queryStartPos)
00218 {
00219
00220 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00221 {
00222 setQueryAndReferenceIndexes();
00223 }
00224 if ((queryIndex < 0) || ((uint32_t)queryIndex >= queryToRef.size()))
00225 {
00226 return(INDEX_NA);
00227 }
00228
00229 if (queryToRef[queryIndex] != INDEX_NA)
00230 {
00231 return(queryToRef[queryIndex] + queryStartPos);
00232 }
00233 return(INDEX_NA);
00234 }
00235
00236
00237
00238
00239
00240 int32_t Cigar::getQueryIndex(int32_t refPosition, int32_t queryStartPos)
00241 {
00242
00243 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00244 {
00245 setQueryAndReferenceIndexes();
00246 }
00247
00248 int32_t refOffset = refPosition - queryStartPos;
00249 if ((refOffset < 0) || ((uint32_t)refOffset >= refToQuery.size()))
00250 {
00251 return(INDEX_NA);
00252 }
00253
00254 return(refToQuery[refOffset]);
00255 }
00256
00257
00258
00259
00260 uint32_t Cigar::getNumOverlaps(int32_t start, int32_t end,
00261 int32_t queryStartPos)
00262 {
00263
00264 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00265 {
00266 setQueryAndReferenceIndexes();
00267 }
00268
00269
00270 int32_t startRefOffset = 0;
00271
00272
00273
00274
00275 if (start > queryStartPos)
00276 {
00277 startRefOffset = start - queryStartPos;
00278 }
00279
00280 int32_t endRefOffset = end - queryStartPos;
00281 if (end == -1)
00282 {
00283
00284
00285
00286 endRefOffset = refToQuery.size();
00287 }
00288
00289
00290
00291
00292 if (endRefOffset < 0)
00293 {
00294 return(0);
00295 }
00296
00297
00298
00299
00300 int32_t refOffset = 0;
00301 int32_t numOverlaps = 0;
00302 for (unsigned int queryIndex = 0; queryIndex < queryToRef.size();
00303 queryIndex++)
00304 {
00305 refOffset = getRefOffset(queryIndex);
00306 if (refOffset > endRefOffset)
00307 {
00308
00309
00310 break;
00311 }
00312 else if ((refOffset >= startRefOffset) && (refOffset < endRefOffset))
00313 {
00314
00315 ++numOverlaps;
00316 }
00317 }
00318
00319 return(numOverlaps);
00320 }
00321
00322
00323
00324 bool Cigar::hasIndel()
00325 {
00326 for(unsigned int i = 0; i < cigarOperations.size(); i++)
00327 {
00328 if((cigarOperations[i].operation == insert) ||
00329 (cigarOperations[i].operation == del))
00330 {
00331
00332 return(true);
00333 }
00334 }
00335
00336 return(false);
00337 }
00338
00339
00340
00341 void Cigar::clearQueryAndReferenceIndexes()
00342 {
00343 queryToRef.clear();
00344 refToQuery.clear();
00345 }
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364 void Cigar::setQueryAndReferenceIndexes()
00365 {
00366
00367 clearQueryAndReferenceIndexes();
00368
00369
00370 for (uint32_t cigarIndex = 0; cigarIndex < cigarOperations.size(); cigarIndex++)
00371 {
00372
00373 switch (cigarOperations[cigarIndex].operation)
00374 {
00375 case match:
00376 case mismatch:
00377
00378
00379 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00380 {
00381
00382
00383 int32_t queryToRefLen = queryToRef.size();
00384 int32_t refToQueryLen = refToQuery.size();
00385 queryToRef.push_back(refToQueryLen);
00386 refToQuery.push_back(queryToRefLen);
00387 }
00388 break;
00389 case insert:
00390 case softClip:
00391
00392
00393 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00394 {
00395 queryToRef.push_back(INDEX_NA);
00396 }
00397 break;
00398 case del:
00399 case skip:
00400
00401
00402 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00403 {
00404 refToQuery.push_back(INDEX_NA);
00405 }
00406 break;
00407 case hardClip:
00408 case pad:
00409 case none:
00410 break;
00411 };
00412 }
00413 }
00414