00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __PILEUP_H__
00019 #define __PILEUP_H__
00020
00021 #include <stdexcept>
00022 #include "SamFile.h"
00023 #include "PosList.h"
00024
00025 template <class PILEUP_TYPE>
00026 class defaultPileup
00027 {
00028 public:
00029 bool operator() (PILEUP_TYPE& element)
00030 {
00031 element.analyze();
00032 return(true);
00033 }
00034 void analyze(PILEUP_TYPE element)
00035 {
00036 element.analyze();
00037 }
00038 };
00039
00040 template <class PILEUP_TYPE>
00041 void defaultPileupAnalyze(PILEUP_TYPE& element)
00042 {
00043 element.analyze();
00044 }
00045
00046
00047 template <class PILEUP_TYPE,
00048 class FUNC_CLASS = defaultPileup<PILEUP_TYPE> >
00049 class Pileup
00050 {
00051 public:
00052 Pileup(const FUNC_CLASS& fp = FUNC_CLASS());
00053
00054 Pileup(int window, const FUNC_CLASS& fp = FUNC_CLASS());
00055
00056
00057 Pileup(const std::string& refSeqFileName,
00058 const FUNC_CLASS& fp = FUNC_CLASS());
00059
00060
00061 Pileup(int window, const std::string& refSeqFileName,
00062 const FUNC_CLASS& fp = FUNC_CLASS());
00063
00064 ~Pileup();
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077 virtual int processFile(const std::string& fileName,
00078 uint16_t excludeFlag = 0x0704,
00079 uint16_t includeFlag = 0);
00080
00081
00082 virtual void processAlignment(SamRecord& record);
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 virtual void processAlignmentRegion(SamRecord& record,
00094 int startPos,
00095 int endPos,
00096 PosList* excludeList = NULL);
00097
00098
00099 void flushPileup();
00100
00101 protected:
00102 FUNC_CLASS myAnalyzeFuncPtr;
00103
00104
00105 void addAlignmentPosition(int refPosition, SamRecord& record);
00106
00107
00108 virtual void flushPileup(int refID, int refPosition);
00109 void flushPileup(int refPosition);
00110
00111
00112
00113
00114 int pileupPosition(int refPosition);
00115
00116 virtual void resetElement(PILEUP_TYPE& element, int position);
00117 virtual void addElement(PILEUP_TYPE& element, SamRecord& record);
00118 virtual void analyzeElement(PILEUP_TYPE& element);
00119 virtual void analyzeHead();
00120
00121 std::vector<PILEUP_TYPE> myElements;
00122
00123 int pileupStart;
00124 int pileupHead;
00125 int pileupTail;
00126 int pileupWindow;
00127
00128 int myCurrentRefID;
00129
00130 GenomeSequence* myRefPtr;
00131 };
00132
00133
00134 template <class PILEUP_TYPE, class FUNC_CLASS>
00135 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(const FUNC_CLASS& fp)
00136 : myAnalyzeFuncPtr(fp),
00137 myElements(),
00138 pileupStart(0),
00139 pileupHead(0),
00140 pileupTail(-1),
00141 pileupWindow(1024),
00142 myCurrentRefID(-2),
00143 myRefPtr(NULL)
00144 {
00145
00146 myElements.resize(pileupWindow);
00147 }
00148
00149
00150 template <class PILEUP_TYPE, class FUNC_CLASS>
00151 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(int window, const FUNC_CLASS& fp)
00152 : myAnalyzeFuncPtr(fp),
00153 myElements(),
00154 pileupStart(0),
00155 pileupHead(0),
00156 pileupTail(-1),
00157 pileupWindow(window),
00158 myCurrentRefID(-2),
00159 myRefPtr(NULL)
00160 {
00161
00162 myElements.resize(window);
00163 }
00164
00165
00166 template <class PILEUP_TYPE, class FUNC_CLASS>
00167 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(const std::string& refSeqFileName, const FUNC_CLASS& fp)
00168 : myAnalyzeFuncPtr(fp),
00169 myElements(),
00170 pileupStart(0),
00171 pileupHead(0),
00172 pileupTail(-1),
00173 pileupWindow(1024),
00174 myCurrentRefID(-2),
00175 myRefPtr(NULL)
00176 {
00177 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
00178
00179
00180 myElements.resize(pileupWindow);
00181
00182 PILEUP_TYPE::setReference(myRefPtr);
00183 }
00184
00185
00186 template <class PILEUP_TYPE, class FUNC_CLASS>
00187 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(int window, const std::string& refSeqFileName, const FUNC_CLASS& fp)
00188 : myAnalyzeFuncPtr(fp),
00189 myElements(),
00190 pileupStart(0),
00191 pileupHead(0),
00192 pileupTail(-1),
00193 pileupWindow(window),
00194 myCurrentRefID(-2),
00195 myRefPtr(NULL)
00196 {
00197 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
00198
00199
00200 myElements.resize(window);
00201
00202 PILEUP_TYPE::setReference(myRefPtr);
00203 }
00204
00205
00206 template <class PILEUP_TYPE, class FUNC_CLASS>
00207 Pileup<PILEUP_TYPE, FUNC_CLASS>::~Pileup()
00208 {
00209 flushPileup();
00210 if(myRefPtr != NULL)
00211 {
00212 delete myRefPtr;
00213 myRefPtr = NULL;
00214 }
00215 }
00216
00217 template <class PILEUP_TYPE, class FUNC_CLASS>
00218 int Pileup<PILEUP_TYPE, FUNC_CLASS>::processFile(const std::string& fileName,
00219 uint16_t excludeFlag,
00220 uint16_t includeFlag)
00221 {
00222 SamFile samIn;
00223 SamFileHeader header;
00224 SamRecord record;
00225
00226 if(myRefPtr != NULL)
00227 {
00228 samIn.SetReference(myRefPtr);
00229 }
00230
00231 if(!samIn.OpenForRead(fileName.c_str()))
00232 {
00233 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00234 return(samIn.GetStatus());
00235 }
00236
00237 if(!samIn.ReadHeader(header))
00238 {
00239 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00240 return(samIn.GetStatus());
00241 }
00242
00243
00244 samIn.setSortedValidation(SamFile::COORDINATE);
00245
00246
00247 while (samIn.ReadRecord(header, record))
00248 {
00249 uint16_t flag = record.getFlag();
00250 if(flag & excludeFlag)
00251 {
00252
00253
00254 continue;
00255 }
00256 if((flag & includeFlag) != includeFlag)
00257 {
00258
00259
00260 continue;
00261 }
00262 processAlignment(record);
00263 }
00264
00265 flushPileup();
00266
00267 int returnValue = 0;
00268 if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
00269 {
00270
00271 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00272 returnValue = samIn.GetStatus();
00273 }
00274 return(returnValue);
00275 }
00276
00277
00278 template <class PILEUP_TYPE, class FUNC_CLASS>
00279 void Pileup<PILEUP_TYPE, FUNC_CLASS>::processAlignment(SamRecord& record)
00280 {
00281 int refPosition = record.get0BasedPosition();
00282 int refID = record.getReferenceID();
00283
00284
00285
00286 flushPileup(refID, refPosition);
00287
00288
00289
00290
00291 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
00292 {
00293 addAlignmentPosition(refPosition, record);
00294 }
00295 }
00296
00297
00298 template <class PILEUP_TYPE, class FUNC_CLASS>
00299 void Pileup<PILEUP_TYPE, FUNC_CLASS>::processAlignmentRegion(SamRecord& record,
00300 int startPos,
00301 int endPos,
00302 PosList* excludeList)
00303 {
00304 int refPosition = record.get0BasedPosition();
00305 int refID = record.getReferenceID();
00306
00307
00308
00309 flushPileup(refID, refPosition);
00310
00311
00312
00313 if(startPos > refPosition)
00314 {
00315 refPosition = startPos;
00316 }
00317
00318
00319
00320
00321 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
00322 {
00323
00324
00325
00326 if((endPos != -1) && (refPosition >= endPos))
00327 {
00328 break;
00329 }
00330
00331
00332 bool addPos = true;
00333 if(excludeList != NULL)
00334 {
00335
00336 if(excludeList->hasPosition(refID, refPosition))
00337 {
00338
00339 addPos = false;
00340 }
00341 }
00342 if(addPos)
00343 {
00344 addAlignmentPosition(refPosition, record);
00345 }
00346 }
00347 }
00348
00349
00350 template <class PILEUP_TYPE, class FUNC_CLASS>
00351 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup()
00352 {
00353
00354
00355
00356 while ((pileupHead <= pileupTail) && (pileupTail != -1))
00357 {
00358 flushPileup(pileupHead+1);
00359 }
00360 pileupStart = pileupHead = 0;
00361 pileupTail = -1;
00362 }
00363
00364
00365
00366 template <class PILEUP_TYPE, class FUNC_CLASS>
00367 void Pileup<PILEUP_TYPE, FUNC_CLASS>::addAlignmentPosition(int refPosition,
00368 SamRecord& record)
00369 {
00370 int offset = pileupPosition(refPosition);
00371
00372 if((offset < 0) || (offset >= pileupWindow))
00373 {
00374 std::cerr << "Pileup Buffer Overflow: position = " << refPosition
00375 << "; refID = " << record.getReferenceID()
00376 << "; recStartPos = " << record.get1BasedPosition()
00377 << "; pileupStart = " << pileupStart
00378 << "; pileupHead = " << pileupHead
00379 << "; pileupTail = " << pileupTail;
00380 }
00381
00382 addElement(myElements[offset], record);
00383 }
00384
00385
00386 template <class PILEUP_TYPE, class FUNC_CLASS>
00387 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup(int refID, int position)
00388 {
00389
00390 if(refID != myCurrentRefID)
00391 {
00392
00393 flushPileup();
00394 myCurrentRefID = refID;
00395 }
00396 else
00397 {
00398
00399 flushPileup(position);
00400 }
00401 }
00402
00403
00404 template <class PILEUP_TYPE, class FUNC_CLASS>
00405 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup(int position)
00406 {
00407
00408
00409 while((pileupHead < position) && (pileupHead <= pileupTail))
00410 {
00411 analyzeHead();
00412
00413 pileupHead++;
00414
00415 if(pileupHead - pileupStart >= pileupWindow)
00416 pileupStart += pileupWindow;
00417 }
00418
00419 if(pileupHead > pileupTail)
00420 {
00421
00422 pileupHead = pileupStart = 0;
00423 pileupTail = -1;
00424 }
00425 }
00426
00427
00428
00429
00430
00431 template <class PILEUP_TYPE, class FUNC_CLASS>
00432 int Pileup<PILEUP_TYPE, FUNC_CLASS>::pileupPosition(int position)
00433 {
00434
00435 if(pileupTail == -1)
00436 {
00437 pileupStart = pileupHead = position;
00438
00439
00440 resetElement(myElements[0], position);
00441 pileupTail = position;
00442 return(0);
00443 }
00444
00445
00446 if((position < pileupHead) || (position > (pileupHead + pileupWindow)))
00447 {
00448 String errorMessage =
00449 "Overflow on the pileup buffer: specifiedPosition = ";
00450 errorMessage += position;
00451 errorMessage += ", pileup buffer start position: ";
00452 errorMessage += pileupHead;
00453 errorMessage += ", pileup buffer end position: ";
00454 errorMessage += pileupHead + pileupWindow;
00455
00456 throw std::runtime_error(errorMessage.c_str());
00457 }
00458
00459
00460 int offset = position - pileupStart;
00461
00462 if(offset >= pileupWindow)
00463 {
00464 offset -= pileupWindow;
00465 }
00466
00467
00468
00469 while(position > pileupTail)
00470 {
00471
00472
00473 ++pileupTail;
00474
00475
00476 offset = pileupTail - pileupStart;
00477 if(offset >= pileupWindow)
00478 {
00479 offset -= pileupWindow;
00480 }
00481
00482
00483
00484 resetElement(myElements[offset], pileupTail);
00485 }
00486
00487 return(offset);
00488 }
00489
00490
00491 template <class PILEUP_TYPE, class FUNC_CLASS>
00492 void Pileup<PILEUP_TYPE, FUNC_CLASS>::resetElement(PILEUP_TYPE& element,
00493 int position)
00494 {
00495 element.reset(position);
00496 }
00497
00498
00499 template <class PILEUP_TYPE, class FUNC_CLASS>
00500 void Pileup<PILEUP_TYPE, FUNC_CLASS>::addElement(PILEUP_TYPE& element,
00501 SamRecord& record)
00502 {
00503 element.addEntry(record);
00504 }
00505
00506
00507 template <class PILEUP_TYPE, class FUNC_CLASS>
00508 void Pileup<PILEUP_TYPE, FUNC_CLASS>::analyzeElement(PILEUP_TYPE& element)
00509 {
00510 myAnalyzeFuncPtr(element);
00511 }
00512
00513
00514 template <class PILEUP_TYPE, class FUNC_CLASS>
00515 void Pileup<PILEUP_TYPE, FUNC_CLASS>::analyzeHead()
00516 {
00517 myAnalyzeFuncPtr(myElements[pileupHead - pileupStart]);
00518 }
00519
00520
00521 #endif