00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __PILEUP_H__
00019 #define __PILEUP_H__
00020
00021 #include <stdexcept>
00022 #include "SamFile.h"
00023
00024 template <class PILEUP_TYPE>
00025 class defaultPileup
00026 {
00027 public:
00028 bool operator() (PILEUP_TYPE& element)
00029 {
00030 element.analyze();
00031 return(true);
00032 }
00033 void analyze(PILEUP_TYPE element)
00034 {
00035 element.analyze();
00036 }
00037 };
00038
00039 template <class PILEUP_TYPE>
00040 void defaultPileupAnalyze(PILEUP_TYPE& element)
00041 {
00042 element.analyze();
00043 }
00044
00045
00046 template <class PILEUP_TYPE,
00047 class FUNC_CLASS = defaultPileup<PILEUP_TYPE> >
00048 class Pileup
00049 {
00050 public:
00051 Pileup(const FUNC_CLASS& fp = FUNC_CLASS());
00052
00053 Pileup(int window, const FUNC_CLASS& fp = FUNC_CLASS());
00054
00055
00056 Pileup(const std::string& refSeqFileName,
00057 const FUNC_CLASS& fp = FUNC_CLASS());
00058
00059
00060 Pileup(int window, const std::string& refSeqFileName,
00061 const FUNC_CLASS& fp = FUNC_CLASS());
00062
00063 ~Pileup();
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 virtual int processFile(const std::string& fileName,
00077 uint16_t excludeFlag = 0x0704,
00078 uint16_t includeFlag = 0);
00079
00080
00081
00082 virtual void processAlignment(SamRecord& record);
00083
00084
00085 void flushPileup();
00086
00087 protected:
00088 FUNC_CLASS myAnalyzeFuncPtr;
00089
00090
00091 void addAlignmentPosition(int refPosition, SamRecord& record);
00092
00093
00094 virtual void flushPileup(int refID, int refPosition);
00095 void flushPileup(int refPosition);
00096
00097
00098
00099
00100 int pileupPosition(int refPosition);
00101
00102 virtual void resetElement(PILEUP_TYPE& element, int position);
00103 virtual void addElement(PILEUP_TYPE& element, SamRecord& record);
00104 virtual void analyzeElement(PILEUP_TYPE& element);
00105 virtual void analyzeHead();
00106
00107 std::vector<PILEUP_TYPE> myElements;
00108
00109 int pileupStart;
00110 int pileupHead;
00111 int pileupTail;
00112 int pileupWindow;
00113
00114 int myCurrentRefID;
00115
00116 GenomeSequence* myRefPtr;
00117 };
00118
00119
00120 template <class PILEUP_TYPE, class FUNC_CLASS>
00121 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(const FUNC_CLASS& fp)
00122 : myAnalyzeFuncPtr(fp),
00123 myElements(),
00124 pileupStart(0),
00125 pileupHead(0),
00126 pileupTail(-1),
00127 pileupWindow(1024),
00128 myCurrentRefID(-2),
00129 myRefPtr(NULL)
00130 {
00131
00132 myElements.resize(pileupWindow);
00133 }
00134
00135
00136 template <class PILEUP_TYPE, class FUNC_CLASS>
00137 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(int window, const FUNC_CLASS& fp)
00138 : myAnalyzeFuncPtr(fp),
00139 myElements(),
00140 pileupStart(0),
00141 pileupHead(0),
00142 pileupTail(-1),
00143 pileupWindow(window),
00144 myCurrentRefID(-2),
00145 myRefPtr(NULL)
00146 {
00147
00148 myElements.resize(window);
00149 }
00150
00151
00152 template <class PILEUP_TYPE, class FUNC_CLASS>
00153 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(const std::string& refSeqFileName, const FUNC_CLASS& fp)
00154 : myAnalyzeFuncPtr(fp),
00155 myElements(),
00156 pileupStart(0),
00157 pileupHead(0),
00158 pileupTail(-1),
00159 pileupWindow(1024),
00160 myCurrentRefID(-2),
00161 myRefPtr(NULL)
00162 {
00163 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
00164
00165
00166 myElements.resize(pileupWindow);
00167
00168 PILEUP_TYPE::setReference(myRefPtr);
00169 }
00170
00171
00172 template <class PILEUP_TYPE, class FUNC_CLASS>
00173 Pileup<PILEUP_TYPE, FUNC_CLASS>::Pileup(int window, const std::string& refSeqFileName, const FUNC_CLASS& fp)
00174 : myAnalyzeFuncPtr(fp),
00175 myElements(),
00176 pileupStart(0),
00177 pileupHead(0),
00178 pileupTail(-1),
00179 pileupWindow(window),
00180 myCurrentRefID(-2),
00181 myRefPtr(NULL)
00182 {
00183 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
00184
00185
00186 myElements.resize(window);
00187
00188 PILEUP_TYPE::setReference(myRefPtr);
00189 }
00190
00191
00192 template <class PILEUP_TYPE, class FUNC_CLASS>
00193 Pileup<PILEUP_TYPE, FUNC_CLASS>::~Pileup()
00194 {
00195 if(myRefPtr != NULL)
00196 {
00197 delete myRefPtr;
00198 myRefPtr = NULL;
00199 }
00200 }
00201
00202 template <class PILEUP_TYPE, class FUNC_CLASS>
00203 int Pileup<PILEUP_TYPE, FUNC_CLASS>::processFile(const std::string& fileName,
00204 uint16_t excludeFlag,
00205 uint16_t includeFlag)
00206 {
00207 SamFile samIn;
00208 SamFileHeader header;
00209 SamRecord record;
00210
00211 if(myRefPtr != NULL)
00212 {
00213 samIn.SetReference(myRefPtr);
00214 }
00215
00216 if(!samIn.OpenForRead(fileName.c_str()))
00217 {
00218 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00219 return(samIn.GetStatus());
00220 }
00221
00222 if(!samIn.ReadHeader(header))
00223 {
00224 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00225 return(samIn.GetStatus());
00226 }
00227
00228
00229 samIn.setSortedValidation(SamFile::COORDINATE);
00230
00231
00232 while (samIn.ReadRecord(header, record))
00233 {
00234 uint16_t flag = record.getFlag();
00235 if(flag & excludeFlag)
00236 {
00237
00238
00239 continue;
00240 }
00241 if((flag & includeFlag) != includeFlag)
00242 {
00243
00244
00245 continue;
00246 }
00247 processAlignment(record);
00248 }
00249
00250 flushPileup();
00251
00252 int returnValue = 0;
00253 if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
00254 {
00255
00256 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
00257 returnValue = samIn.GetStatus();
00258 }
00259 return(returnValue);
00260 }
00261
00262
00263 template <class PILEUP_TYPE, class FUNC_CLASS>
00264 void Pileup<PILEUP_TYPE, FUNC_CLASS>::processAlignment(SamRecord& record)
00265 {
00266 int refPosition = record.get0BasedPosition();
00267 int refID = record.getReferenceID();
00268
00269
00270
00271 flushPileup(refID, refPosition);
00272
00273
00274
00275
00276 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
00277 {
00278 addAlignmentPosition(refPosition, record);
00279 }
00280 }
00281
00282
00283 template <class PILEUP_TYPE, class FUNC_CLASS>
00284 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup()
00285 {
00286
00287
00288
00289 while ((pileupHead <= pileupTail) && (pileupTail != -1))
00290 {
00291 flushPileup(pileupHead+1);
00292 }
00293 pileupStart = pileupHead = 0;
00294 pileupTail = -1;
00295 }
00296
00297
00298
00299 template <class PILEUP_TYPE, class FUNC_CLASS>
00300 void Pileup<PILEUP_TYPE, FUNC_CLASS>::addAlignmentPosition(int refPosition,
00301 SamRecord& record)
00302 {
00303 int offset = pileupPosition(refPosition);
00304
00305 if((offset < 0) || (offset >= pileupWindow))
00306 {
00307 std::cerr << "Pileup Buffer Overflow: position = " << refPosition
00308 << "; refID = " << record.getReferenceID()
00309 << "; recStartPos = " << record.get1BasedPosition()
00310 << "; pileupStart = " << pileupStart
00311 << "; pileupHead = " << pileupHead
00312 << "; pileupTail = " << pileupTail;
00313 }
00314
00315 addElement(myElements[offset], record);
00316 }
00317
00318
00319 template <class PILEUP_TYPE, class FUNC_CLASS>
00320 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup(int refID, int position)
00321 {
00322
00323 if(refID != myCurrentRefID)
00324 {
00325
00326 flushPileup();
00327 myCurrentRefID = refID;
00328
00329
00330 pileupStart = pileupHead = position;
00331 }
00332 else
00333 {
00334
00335 flushPileup(position);
00336 }
00337 }
00338
00339
00340 template <class PILEUP_TYPE, class FUNC_CLASS>
00341 void Pileup<PILEUP_TYPE, FUNC_CLASS>::flushPileup(int position)
00342 {
00343
00344
00345 while((pileupHead < position) && (pileupHead <= pileupTail))
00346 {
00347 analyzeHead();
00348
00349 pileupHead++;
00350
00351 if(pileupHead - pileupStart >= pileupWindow)
00352 pileupStart += pileupWindow;
00353 }
00354
00355
00356
00357 if(pileupHead != position)
00358 {
00359 pileupHead = pileupStart = position;
00360 }
00361 }
00362
00363
00364
00365
00366
00367 template <class PILEUP_TYPE, class FUNC_CLASS>
00368 int Pileup<PILEUP_TYPE, FUNC_CLASS>::pileupPosition(int position)
00369 {
00370
00371 if(pileupTail == -1)
00372 {
00373 pileupStart = pileupHead = position;
00374 }
00375
00376
00377 if((position < pileupHead) || (position > (pileupHead + pileupWindow)))
00378 {
00379 String errorMessage =
00380 "Overflow on the pileup buffer: specifiedPosition = ";
00381 errorMessage += position;
00382 errorMessage += ", pileup buffer start position: ";
00383 errorMessage += pileupHead;
00384 errorMessage += ", pileup buffer end position: ";
00385 errorMessage += pileupHead + pileupWindow;
00386
00387 throw std::runtime_error(errorMessage.c_str());
00388 }
00389
00390 int offset = position - pileupStart;
00391
00392 if(offset >= pileupWindow)
00393 {
00394 offset -= pileupWindow;
00395 }
00396
00397 if(position > pileupTail)
00398 {
00399
00400
00401 resetElement(myElements[offset], position);
00402 pileupTail = position;
00403 }
00404
00405 return(offset);
00406 }
00407
00408
00409 template <class PILEUP_TYPE, class FUNC_CLASS>
00410 void Pileup<PILEUP_TYPE, FUNC_CLASS>::resetElement(PILEUP_TYPE& element,
00411 int position)
00412 {
00413 element.reset(position);
00414 }
00415
00416
00417 template <class PILEUP_TYPE, class FUNC_CLASS>
00418 void Pileup<PILEUP_TYPE, FUNC_CLASS>::addElement(PILEUP_TYPE& element,
00419 SamRecord& record)
00420 {
00421 element.addEntry(record);
00422 }
00423
00424
00425 template <class PILEUP_TYPE, class FUNC_CLASS>
00426 void Pileup<PILEUP_TYPE, FUNC_CLASS>::analyzeElement(PILEUP_TYPE& element)
00427 {
00428 myAnalyzeFuncPtr(element);
00429 }
00430
00431
00432 template <class PILEUP_TYPE, class FUNC_CLASS>
00433 void Pileup<PILEUP_TYPE, FUNC_CLASS>::analyzeHead()
00434 {
00435 myAnalyzeFuncPtr(myElements[pileupHead - pileupStart]);
00436 }
00437
00438
00439 #endif