Cigar.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include "Cigar.h"
00021 #include "STLUtilities.h"
00022
00023
00024 const int32_t Cigar::INDEX_NA = -1;
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 void Cigar::getCigarString(std::string& cigarString) const
00037 {
00038 using namespace STLUtilities;
00039
00040 std::vector<CigarOperator>::const_iterator i;
00041
00042 cigarString.clear();
00043
00044
00045
00046 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00047 {
00048 cigarString << (*i).count << (*i).getChar();
00049 }
00050 }
00051
00052 void Cigar::getCigarString(String& cigarString) const
00053 {
00054 std::string cigar;
00055
00056 getCigarString(cigar);
00057
00058 cigarString = cigar.c_str();
00059
00060 return;
00061 }
00062
00063 void Cigar::getExpandedString(std::string &s) const
00064 {
00065 s = "";
00066
00067 std::vector<CigarOperator>::const_iterator i;
00068
00069
00070
00071
00072 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00073 {
00074 for (uint32_t j = 0; j<(*i).count; j++) s += (*i).getChar();
00075 }
00076 return;
00077 }
00078
00079
00080 bool Cigar::operator == (Cigar &rhs) const
00081 {
00082
00083 if (this->size() != rhs.size()) return false;
00084
00085 for (int i = 0; i < this->size(); i++)
00086 {
00087 if (cigarOperations[i]!=rhs.cigarOperations[i]) return false;
00088 }
00089 return true;
00090 }
00091
00092
00093
00094
00095 int Cigar::getExpectedQueryBaseCount() const
00096 {
00097 int matchCount = 0;
00098 std::vector<CigarOperator>::const_iterator i;
00099 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00100 {
00101 switch (i->operation)
00102 {
00103 case match:
00104 case mismatch:
00105 case softClip:
00106 case insert:
00107 matchCount += i->count;
00108 break;
00109 default:
00110
00111 break;
00112 }
00113 }
00114 return matchCount;
00115 }
00116
00117
00118
00119
00120 int Cigar::getExpectedReferenceBaseCount() const
00121 {
00122 int matchCount = 0;
00123 std::vector<CigarOperator>::const_iterator i;
00124 for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00125 {
00126 switch (i->operation)
00127 {
00128 case match:
00129 case mismatch:
00130 case del:
00131 case skip:
00132 matchCount += i->count;
00133 break;
00134 default:
00135
00136 break;
00137 }
00138 }
00139 return matchCount;
00140 }
00141
00142
00143
00144 int Cigar::getNumBeginClips() const
00145 {
00146 int numBeginClips = 0;
00147 for (unsigned int i = 0; i != cigarOperations.size(); i++)
00148 {
00149 if ((cigarOperations[i].operation == softClip) ||
00150 (cigarOperations[i].operation == hardClip))
00151 {
00152
00153 numBeginClips += cigarOperations[i].count;
00154 }
00155 else
00156 {
00157
00158 break;
00159 }
00160 }
00161 return(numBeginClips);
00162 }
00163
00164
00165
00166 int Cigar::getNumEndClips() const
00167 {
00168 int numEndClips = 0;
00169 for (int i = (cigarOperations.size() - 1); i >= 0; i--)
00170 {
00171 if ((cigarOperations[i].operation == softClip) ||
00172 (cigarOperations[i].operation == hardClip))
00173 {
00174
00175 numEndClips += cigarOperations[i].count;
00176 }
00177 else
00178 {
00179
00180 break;
00181 }
00182 }
00183 return(numEndClips);
00184 }
00185
00186
00187 int32_t Cigar::getRefOffset(int32_t queryIndex)
00188 {
00189
00190 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00191 {
00192 setQueryAndReferenceIndexes();
00193 }
00194 if ((queryIndex < 0) || ((uint32_t)queryIndex >= queryToRef.size()))
00195 {
00196 return(INDEX_NA);
00197 }
00198 return(queryToRef[queryIndex]);
00199 }
00200
00201
00202 int32_t Cigar::getQueryIndex(int32_t refOffset)
00203 {
00204
00205 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00206 {
00207 setQueryAndReferenceIndexes();
00208 }
00209 if ((refOffset < 0) || ((uint32_t)refOffset >= refToQuery.size()))
00210 {
00211 return(INDEX_NA);
00212 }
00213 return(refToQuery[refOffset]);
00214 }
00215
00216
00217 int32_t Cigar::getRefPosition(int32_t queryIndex, int32_t queryStartPos)
00218 {
00219
00220 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00221 {
00222 setQueryAndReferenceIndexes();
00223 }
00224 if ((queryIndex < 0) || ((uint32_t)queryIndex >= queryToRef.size()))
00225 {
00226 return(INDEX_NA);
00227 }
00228
00229 if (queryToRef[queryIndex] != INDEX_NA)
00230 {
00231 return(queryToRef[queryIndex] + queryStartPos);
00232 }
00233 return(INDEX_NA);
00234 }
00235
00236
00237
00238
00239
00240 int32_t Cigar::getQueryIndex(int32_t refPosition, int32_t queryStartPos)
00241 {
00242
00243 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00244 {
00245 setQueryAndReferenceIndexes();
00246 }
00247
00248 int32_t refOffset = refPosition - queryStartPos;
00249 if ((refOffset < 0) || ((uint32_t)refOffset >= refToQuery.size()))
00250 {
00251 return(INDEX_NA);
00252 }
00253
00254 return(refToQuery[refOffset]);
00255 }
00256
00257
00258 int32_t Cigar::getExpandedCigarIndexFromQueryIndex(int32_t queryIndex)
00259 {
00260
00261 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00262 {
00263 setQueryAndReferenceIndexes();
00264 }
00265 if ((queryIndex < 0) || ((uint32_t)queryIndex >= queryToCigar.size()))
00266 {
00267 return(INDEX_NA);
00268 }
00269 return(queryToCigar[queryIndex]);
00270 }
00271
00272
00273 int32_t Cigar::getExpandedCigarIndexFromRefOffset(int32_t refOffset)
00274 {
00275
00276 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00277 {
00278 setQueryAndReferenceIndexes();
00279 }
00280 if ((refOffset < 0) || ((uint32_t)refOffset >= refToCigar.size()))
00281 {
00282 return(INDEX_NA);
00283 }
00284 return(refToCigar[refOffset]);
00285 }
00286
00287
00288 int32_t Cigar::getExpandedCigarIndexFromRefPos(int32_t refPosition,
00289 int32_t queryStartPos)
00290 {
00291 return(getExpandedCigarIndexFromRefOffset(refPosition - queryStartPos));
00292 }
00293
00294
00295 char Cigar::getCigarCharOp(int32_t expandedCigarIndex)
00296 {
00297
00298 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00299 {
00300
00301 setQueryAndReferenceIndexes();
00302 }
00303
00304
00305 if((expandedCigarIndex < 0) ||
00306 ((uint32_t)expandedCigarIndex >= myExpandedCigar.length()))
00307 {
00308 return('?');
00309 }
00310 return(myExpandedCigar[expandedCigarIndex]);
00311 }
00312
00313
00314 char Cigar::getCigarCharOpFromQueryIndex(int32_t queryIndex)
00315 {
00316 return(getCigarCharOp(getExpandedCigarIndexFromQueryIndex(queryIndex)));
00317 }
00318
00319
00320 char Cigar::getCigarCharOpFromRefOffset(int32_t refOffset)
00321 {
00322 return(getCigarCharOp(getExpandedCigarIndexFromRefOffset(refOffset)));
00323 }
00324
00325
00326 char Cigar::getCigarCharOpFromRefPos(int32_t refPosition, int32_t queryStartPos)
00327 {
00328 return(getCigarCharOp(getExpandedCigarIndexFromRefPos(refPosition, queryStartPos)));
00329 }
00330
00331
00332
00333
00334 uint32_t Cigar::getNumOverlaps(int32_t start, int32_t end,
00335 int32_t queryStartPos)
00336 {
00337
00338 if ((queryToRef.size() == 0) || (refToQuery.size() == 0))
00339 {
00340 setQueryAndReferenceIndexes();
00341 }
00342
00343
00344 int32_t startRefOffset = 0;
00345
00346
00347
00348
00349 if (start > queryStartPos)
00350 {
00351 startRefOffset = start - queryStartPos;
00352 }
00353
00354 int32_t endRefOffset = end - queryStartPos;
00355 if (end == -1)
00356 {
00357
00358
00359
00360 endRefOffset = refToQuery.size();
00361 }
00362
00363
00364
00365
00366 if (endRefOffset < 0)
00367 {
00368 return(0);
00369 }
00370
00371
00372
00373
00374 int32_t refOffset = 0;
00375 int32_t numOverlaps = 0;
00376 for (unsigned int queryIndex = 0; queryIndex < queryToRef.size();
00377 queryIndex++)
00378 {
00379 refOffset = getRefOffset(queryIndex);
00380 if (refOffset > endRefOffset)
00381 {
00382
00383
00384 break;
00385 }
00386 else if ((refOffset >= startRefOffset) && (refOffset < endRefOffset))
00387 {
00388
00389 ++numOverlaps;
00390 }
00391 }
00392
00393 return(numOverlaps);
00394 }
00395
00396
00397
00398 bool Cigar::hasIndel()
00399 {
00400 for(unsigned int i = 0; i < cigarOperations.size(); i++)
00401 {
00402 if((cigarOperations[i].operation == insert) ||
00403 (cigarOperations[i].operation == del))
00404 {
00405
00406 return(true);
00407 }
00408 }
00409
00410 return(false);
00411 }
00412
00413
00414
00415 void Cigar::clearQueryAndReferenceIndexes()
00416 {
00417 queryToRef.clear();
00418 refToQuery.clear();
00419 refToCigar.clear();
00420 queryToCigar.clear();
00421 myExpandedCigar.clear();
00422 }
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441 void Cigar::setQueryAndReferenceIndexes()
00442 {
00443
00444 clearQueryAndReferenceIndexes();
00445
00446 int extPos = 0;
00447
00448
00449 for (uint32_t cigarIndex = 0; cigarIndex < cigarOperations.size(); cigarIndex++)
00450 {
00451
00452 switch (cigarOperations[cigarIndex].operation)
00453 {
00454 case match:
00455 case mismatch:
00456
00457
00458 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00459 {
00460
00461
00462 int32_t queryToRefLen = queryToRef.size();
00463 int32_t refToQueryLen = refToQuery.size();
00464 queryToRef.push_back(refToQueryLen);
00465 refToQuery.push_back(queryToRefLen);
00466 refToCigar.push_back(extPos);
00467 queryToCigar.push_back(extPos++);
00468 myExpandedCigar.push_back(cigarOperations[cigarIndex].getChar());
00469 }
00470 break;
00471 case insert:
00472 case softClip:
00473
00474
00475 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00476 {
00477 queryToRef.push_back(INDEX_NA);
00478 queryToCigar.push_back(extPos++);
00479 myExpandedCigar.push_back(cigarOperations[cigarIndex].getChar());
00480 }
00481 break;
00482 case del:
00483 case skip:
00484
00485
00486 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00487 {
00488 refToQuery.push_back(INDEX_NA);
00489 refToCigar.push_back(extPos++);
00490 myExpandedCigar.push_back(cigarOperations[cigarIndex].getChar());
00491 }
00492 break;
00493 case hardClip:
00494 case pad:
00495 case none:
00496 for (uint32_t i = 0; i < cigarOperations[cigarIndex].count; i++)
00497 {
00498 myExpandedCigar.push_back(cigarOperations[cigarIndex].getChar());
00499 ++extPos;
00500 }
00501 break;
00502 };
00503 }
00504 }
00505