00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __PILEUP_H__
00019 #define __PILEUP_H__
00020
00021 #include <stdexcept>
00022 #include "SamFile.h"
00023 #include "PosList.h"
00024
00025
00026 class PileupHelper
00027 {
00028 public:
00029 static const int DEFAULT_WINDOW_SIZE = 1024;
00030 };
00031
00032 template <class PILEUP_TYPE>
00033 class defaultPileup
00034 {
00035 public:
00036 bool operator() (PILEUP_TYPE& element)
00037 {
00038 element.analyze();
00039 return(true);
00040 }
00041 void analyze(PILEUP_TYPE element)
00042 {
00043 element.analyze();
00044 }
00045 };
00046
00047 template <class PILEUP_TYPE>
00048 void defaultPileupAnalyze(PILEUP_TYPE& element)
00049 {
00050 element.analyze();
00051 }
00052
00053
00054
00055
00056 template <class PILEUP_TYPE,
00057 class FUNC_CLASS = defaultPileup<PILEUP_TYPE> >
00058 class Pileup
00059 {
00060 public:
00061
00062 Pileup(const FUNC_CLASS& fp = FUNC_CLASS());
00063
00064
00065
00066
00067
00068 Pileup(int window, const FUNC_CLASS& fp = FUNC_CLASS());
00069
00070
00071 Pileup(const std::string& refSeqFileName,
00072 const FUNC_CLASS& fp = FUNC_CLASS());
00073
00074
00075 Pileup(int window, const std::string& refSeqFileName,
00076 const FUNC_CLASS& fp = FUNC_CLASS());
00077
00078
00079 virtual ~Pileup();
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 virtual int processFile(const std::string& fileName,
00094 uint16_t excludeFlag = 0x0704,
00095 uint16_t includeFlag = 0);
00096
00097
00098 virtual void processAlignment(SamRecord& record);
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109 virtual void processAlignmentRegion(SamRecord& record,
00110 int startPos,
00111 int endPos,
00112 PosList* excludeList = NULL);
00113
00114
00115
00116 void flushPileup();
00117
00118 protected:
00119 FUNC_CLASS myAnalyzeFuncPtr;
00120
00121
00122 void addAlignmentPosition(int refPosition, SamRecord& record);
00123
00124
00125 virtual void flushPileup(int refID, int refPosition);
00126 void flushPileup(int refPosition);
00127
00128
00129
00130
00131 int pileupPosition(int refPosition);
00132
00133 virtual void resetElement(PILEUP_TYPE& element, int position);
00134 virtual void addElement(PILEUP_TYPE& element, SamRecord& record);
00135 virtual void analyzeElement(PILEUP_TYPE& element);
00136 virtual void analyzeHead();
00137
00138 std::vector<PILEUP_TYPE> myElements;
00139
00140 int pileupStart;
00141 int pileupHead;
00142 int pileupTail;
00143 int pileupWindow;
00144
00145 int myCurrentRefID;
00146
00147 GenomeSequence* myRefPtr;
00148 };
00149
00150
00151 template <class PILEUP_TYPE, class FUNC_CLASS>
00152 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(const FUNC_CLASS& fp)
00153 : myAnalyzeFuncPtr(fp),
00154 myElements(),
00155 pileupStart(0),
00156 pileupHead(0),
00157 pileupTail(-1),
00158 pileupWindow(PileupHelper::DEFAULT_WINDOW_SIZE),
00159 myCurrentRefID(-2),
00160 myRefPtr(NULL)
00161 {
00162
00163 myElements.resize(pileupWindow);
00164 }
00165
00166
00167 template <class PILEUP_TYPE, class FUNC_CLASS>
00168 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(int window, const FUNC_CLASS& fp)
00169 : myAnalyzeFuncPtr(fp),
00170 myElements(),
00171 pileupStart(0),
00172 pileupHead(0),
00173 pileupTail(-1),
00174 pileupWindow(window),
00175 myCurrentRefID(-2),
00176 myRefPtr(NULL)
00177 {
00178
00179 myElements.resize(window);
00180 }
00181
00182
00183 template <class PILEUP_TYPE, class FUNC_CLASS>
00184 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(const std::string& refSeqFileName, const FUNC_CLASS& fp)
00185 : myAnalyzeFuncPtr(fp),
00186 myElements(),
00187 pileupStart(0),
00188 pileupHead(0),
00189 pileupTail(-1),
00190 pileupWindow(PileupHelper::DEFAULT_WINDOW_SIZE),
00191 myCurrentRefID(-2),
00192 myRefPtr(NULL)
00193 {
00194 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
00195
00196
00197 myElements.resize(pileupWindow);
00198
00199 PILEUP_TYPE::setReference(myRefPtr);
00200 }
00201
00202
00203 template <class PILEUP_TYPE, class FUNC_CLASS>
00204 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(int window, const std::string& refSeqFileName, const FUNC_CLASS& fp)
00205 : myAnalyzeFuncPtr(fp),
00206 myElements(),
00207 pileupStart(0),
00208 pileupHead(0),
00209 pileupTail(-1),
00210 pileupWindow(window),
00211 myCurrentRefID(-2),
00212 myRefPtr(NULL)
00213 {
00214 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
00215
00216
00217 myElements.resize(window);
00218
00219 PILEUP_TYPE::setReference(myRefPtr);
00220 }
00221
00222
00223 template <class PILEUP_TYPE, class FUNC_CLASS>
00224 Pileup<PILEUP_TYPE, FUNC_CLASS>::~Pileup()
00225 {
00226 flushPileup();
00227 if(myRefPtr != NULL)
00228 {
00229 delete myRefPtr;
00230 myRefPtr = NULL;
00231 }
00232 }
00233
00234 template <class PILEUP_TYPE, class FUNC_CLASS>
00235 int Pileup<PILEUP_TYPE, FUNC_CLASS>::processFile(const std::string& fileName,
00236 uint16_t excludeFlag,
00237 uint16_t includeFlag)
00238 {
00239 SamFile samIn;
00240 SamFileHeader header;
00241 SamRecord record;
00242
00243 if(myRefPtr != NULL)
00244 {
00245 samIn.SetReference(myRefPtr);
00246 }
00247
00248 if(!samIn.OpenForRead(fileName.c_str()))
00249 {
00250 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00251 return(samIn.GetStatus());
00252 }
00253
00254 if(!samIn.ReadHeader(header))
00255 {
00256 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00257 return(samIn.GetStatus());
00258 }
00259
00260
00261 samIn.setSortedValidation(SamFile::COORDINATE);
00262
00263
00264 while (samIn.ReadRecord(header, record))
00265 {
00266 uint16_t flag = record.getFlag();
00267 if(flag & excludeFlag)
00268 {
00269
00270
00271 continue;
00272 }
00273 if((flag & includeFlag) != includeFlag)
00274 {
00275
00276
00277 continue;
00278 }
00279 processAlignment(record);
00280 }
00281
00282 flushPileup();
00283
00284 int returnValue = 0;
00285 if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
00286 {
00287
00288 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00289 returnValue = samIn.GetStatus();
00290 }
00291 return(returnValue);
00292 }
00293
00294
00295 template <class PILEUP_TYPE, class FUNC_CLASS>
00296 void Pileup<PILEUP_TYPE, FUNC_CLASS>::processAlignment(SamRecord& record)
00297 {
00298 int refPosition = record.get0BasedPosition();
00299 int refID = record.getReferenceID();
00300
00301
00302
00303 flushPileup(refID, refPosition);
00304
00305
00306
00307
00308 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
00309 {
00310 addAlignmentPosition(refPosition, record);
00311 }
00312 }
00313
00314
00315 template <class PILEUP_TYPE, class FUNC_CLASS>
00316 void Pileup<PILEUP_TYPE, FUNC_CLASS>::processAlignmentRegion(SamRecord& record,
00317 int startPos,
00318 int endPos,
00319 PosList* excludeList)
00320 {
00321 int refPosition = record.get0BasedPosition();
00322 int refID = record.getReferenceID();
00323
00324
00325
00326 flushPileup(refID, refPosition);
00327
00328
00329
00330 if(startPos > refPosition)
00331 {
00332 refPosition = startPos;
00333 }
00334
00335
00336
00337
00338 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
00339 {
00340
00341
00342
00343 if((endPos != -1) && (refPosition >= endPos))
00344 {
00345 break;
00346 }
00347
00348
00349 bool addPos = true;
00350 if(excludeList != NULL)
00351 {
00352
00353 if(excludeList->hasPosition(refID, refPosition))
00354 {
00355
00356 addPos = false;
00357 }
00358 }
00359 if(addPos)
00360 {
00361 addAlignmentPosition(refPosition, record);
00362 }
00363 }
00364 }
00365
00366
00367 template <class PILEUP_TYPE, class FUNC_CLASS>
00368 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup()
00369 {
00370
00371
00372
00373 while ((pileupHead <= pileupTail) && (pileupTail != -1))
00374 {
00375 flushPileup(pileupHead+1);
00376 }
00377 pileupStart = pileupHead = 0;
00378 pileupTail = -1;
00379 }
00380
00381
00382
00383 template <class PILEUP_TYPE, class FUNC_CLASS>
00384 void Pileup<PILEUP_TYPE, FUNC_CLASS>::addAlignmentPosition(int refPosition,
00385 SamRecord& record)
00386 {
00387 int offset = pileupPosition(refPosition);
00388
00389 if((offset < 0) || (offset >= pileupWindow))
00390 {
00391 std::cerr << "Pileup Buffer Overflow: position = " << refPosition
00392 << "; refID = " << record.getReferenceID()
00393 << "; recStartPos = " << record.get1BasedPosition()
00394 << "; pileupStart = " << pileupStart
00395 << "; pileupHead = " << pileupHead
00396 << "; pileupTail = " << pileupTail;
00397 }
00398
00399 addElement(myElements[offset], record);
00400 }
00401
00402
00403 template <class PILEUP_TYPE, class FUNC_CLASS>
00404 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup(int refID, int position)
00405 {
00406
00407 if(refID != myCurrentRefID)
00408 {
00409
00410 flushPileup();
00411 myCurrentRefID = refID;
00412 }
00413 else
00414 {
00415
00416 flushPileup(position);
00417 }
00418 }
00419
00420
00421 template <class PILEUP_TYPE, class FUNC_CLASS>
00422 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup(int position)
00423 {
00424
00425
00426 while((pileupHead < position) && (pileupHead <= pileupTail))
00427 {
00428 analyzeHead();
00429
00430 pileupHead++;
00431
00432 if(pileupHead - pileupStart >= pileupWindow)
00433 pileupStart += pileupWindow;
00434 }
00435
00436 if(pileupHead > pileupTail)
00437 {
00438
00439 pileupHead = pileupStart = 0;
00440 pileupTail = -1;
00441 }
00442 }
00443
00444
00445
00446
00447
00448 template <class PILEUP_TYPE, class FUNC_CLASS>
00449 int Pileup<PILEUP_TYPE, FUNC_CLASS>::pileupPosition(int position)
00450 {
00451
00452 if(pileupTail == -1)
00453 {
00454 pileupStart = pileupHead = position;
00455
00456
00457 resetElement(myElements[0], position);
00458 pileupTail = position;
00459 return(0);
00460 }
00461
00462
00463 if((position < pileupHead) || (position > (pileupHead + pileupWindow)))
00464 {
00465 String errorMessage =
00466 "Overflow on the pileup buffer: specifiedPosition = ";
00467 errorMessage += position;
00468 errorMessage += ", pileup buffer start position: ";
00469 errorMessage += pileupHead;
00470 errorMessage += ", pileup buffer end position: ";
00471 errorMessage += pileupHead + pileupWindow;
00472
00473 throw std::runtime_error(errorMessage.c_str());
00474 }
00475
00476
00477 int offset = position - pileupStart;
00478
00479 if(offset >= pileupWindow)
00480 {
00481 offset -= pileupWindow;
00482 }
00483
00484
00485
00486 while(position > pileupTail)
00487 {
00488
00489
00490 ++pileupTail;
00491
00492
00493 offset = pileupTail - pileupStart;
00494 if(offset >= pileupWindow)
00495 {
00496 offset -= pileupWindow;
00497 }
00498
00499
00500
00501 resetElement(myElements[offset], pileupTail);
00502 }
00503
00504 return(offset);
00505 }
00506
00507
00508 template <class PILEUP_TYPE, class FUNC_CLASS>
00509 void Pileup<PILEUP_TYPE, FUNC_CLASS>::resetElement(PILEUP_TYPE& element,
00510 int position)
00511 {
00512 element.reset(position);
00513 }
00514
00515
00516 template <class PILEUP_TYPE, class FUNC_CLASS>
00517 void Pileup<PILEUP_TYPE, FUNC_CLASS>::addElement(PILEUP_TYPE& element,
00518 SamRecord& record)
00519 {
00520 element.addEntry(record);
00521 }
00522
00523
00524 template <class PILEUP_TYPE, class FUNC_CLASS>
00525 void Pileup<PILEUP_TYPE, FUNC_CLASS>::analyzeElement(PILEUP_TYPE& element)
00526 {
00527 myAnalyzeFuncPtr(element);
00528 }
00529
00530
00531 template <class PILEUP_TYPE, class FUNC_CLASS>
00532 void Pileup<PILEUP_TYPE, FUNC_CLASS>::analyzeHead()
00533 {
00534 myAnalyzeFuncPtr(myElements[pileupHead - pileupStart]);
00535 }
00536
00537
00538 #endif