libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #if !defined(_SMITH_WATERMAN_) 00019 #define _SMITH_WATERMAN_ 00020 00021 #include <string.h> // for inline use of strcat, etc 00022 #include <limits.h> // for INT_MAX 00023 #include <stdint.h> // for uint32_t and friends 00024 00025 // 00026 // This file implements a bi-directional, banded Smith Waterman matching 00027 // algorithm. 00028 // 00029 // The design is dictated by several observations: 00030 // 00031 // - building the full matrix H for the read and reference is slow, 00032 // so we perform it only for a band down the diagonal, thus speeding 00033 // up the algorithm at the cost of reduced detection of insert/deletes. 00034 // 00035 // - we must minimize data copying 00036 // 00037 // - we must have the ability to test the algorithm quickly and simply, 00038 // hence we implement the class as a template so that this file doesn't 00039 // have to depend on Karma's GenomeSequence object, which is a relatively 00040 // heavy weight object to do testing against. 00041 // 00042 // - because Karma uses index words to determine match candidate locations 00043 // across the genome, and because we use the banded Smith Waterman approach, 00044 // we must provide bi-directional Smith Waterman matching. See example below. 00045 // 00046 // To fully understand the examples below, make sure you understand 00047 // Phred quality scores, CIGAR strings, and pattern matching. 00048 // 00049 // Simple Functional Examples: 00050 // 00051 // Given a read, a read quality and a reference, we want to obtain some 00052 // measure of how well that read matches the reference at that given location. 00053 // So, we have: 00054 // Read: "ATCG" 00055 // Quality: "$$$$" (Phred scores - all are decimal 3) 00056 // Reference: "ATCG" 00057 // We expect: sumQ = 0, and Cigar "4M" 00058 // 00059 // Complex Functional Examples: 00060 // Read: "AATCG" 00061 // Quality: "$$$$$" (Phred scores - all are decimal 3) 00062 // Reference: "ATCG" 00063 // We expect: sumQ = 3, and Cigar "1M1I3M" 00064 // 00065 // Backwards matching: 00066 // It is harder for me to construct a clear example, so imagine a read with 00067 // cumulative inserts or deletes that sum up to a number larger than the 00068 // width of the band along the diagonal. If we perform SW in the 'wrong' 00069 // direction, we will lose the read entirely, whereas if we start from the 00070 // end that is known to be matching, we may obtain a good match for the bulk 00071 // of the read. 00072 // 00073 // For example, you could imagine a read where the first 10 bases had a mess 00074 // of inserts, but then was clean for the next 100 bases. You'd want it 00075 // to match as many of the good bases as practical, even if you knew you were 00076 // losing information at the end. 00077 // 00078 00079 #include <assert.h> 00080 #include <stdio.h> 00081 #include <stdlib.h> 00082 #include <string> 00083 #include <iostream> 00084 #include <iomanip> 00085 #include <utility> 00086 #include <vector> 00087 00088 #include "CigarRoller.h" 00089 #include "Generic.h" 00090 00091 using std::cout; 00092 using std::cin; 00093 using std::cerr; 00094 using std::setw; 00095 using std::endl; 00096 using std::pair; 00097 using std::vector; 00098 00099 #if !defined(MAX) 00100 #define MAX(x,y) ((x)>(y) ? (x) : (y)) 00101 #endif 00102 #if !defined(MIN) 00103 #define MIN(x,y) ((x)<(y) ? (x) : (y)) 00104 #endif 00105 00106 // 00107 // Implement the core of Smith Waterman as described in: 00108 // http://en.wikipedia.org/wiki/Smith_waterman 00109 // 00110 // The only variation from the basic SW algorithm is the 00111 // use of a banded approach - to limit the algorithm to 00112 // a band along the diagonal. This limits the maximum 00113 // additive number of indels, but allows an O(c*M) approach, 00114 // where c is the constant max number of indels allowed. 00115 // 00116 // This is implemented as function templates because for testing, it is easier 00117 // to use character arrays. In our mapper, we will be using a 00118 // combination of a String object for the read and the genome object 00119 // as the reference. Both can be indexed, and give a character (base) 00120 // value, but the code would be duplicated if we implement SW for 00121 // each type of argument. 00122 // 00123 // Htype -> the type of the array H cell (8 or 16 bit unsigned int) 00124 // Atype -> the read string type (must have Atype::operator [] defined) 00125 // 00126 template <int maxReadLengthH, int maxReferenceLengthH, typename HCellType, typename Atype, typename Btype, typename QualityType, typename readIndexType, typename referenceIndexType> 00127 class SmithWaterman 00128 { 00129 public: 00130 00131 // 00132 // XXX in theory, this weight should be sensitive 00133 // to the quality of the base, and should have 00134 // an appropriate cost for an indel, as well. 00135 // 00136 // I think we need to get rid of this, since it is 00137 // basically wrong for our needs. 00138 // 00139 struct weight 00140 { 00141 weight() 00142 { 00143 match=2; 00144 misMatch=-1; 00145 insert=-1; 00146 del=-1; 00147 }; 00148 00149 int match; 00150 int misMatch; 00151 int insert; 00152 int del; 00153 }; 00154 00155 HCellType H[maxReadLengthH][maxReferenceLengthH]; 00156 Atype *A; 00157 Btype *B; 00158 QualityType *qualities; 00159 00160 int m,n; 00161 readIndexType MOffset; // constant offset to m (read) 00162 referenceIndexType NOffset; // constant offset to n (reference) 00163 weight w; 00164 int allowedInsertDelete; 00165 int direction; 00166 int gapOpenCount; 00167 int gapCloseCount; 00168 int gapExtendCount; 00169 vector<pair<int,int> > alignment; 00170 void clearAlignment() 00171 { 00172 alignment.clear(); 00173 } 00174 00175 HCellType maxCostValue; // max Cost value in H 00176 pair<int,int> maxCostPosition; // row/col of max cost value in H 00177 00178 // 00179 // Clear the member variables only. 00180 // To clear H, call clearH(). 00181 // 00182 // In theory, clear() plus set() should be sufficient to 00183 // get a clean run, but I haven't tested this extensively. 00184 // 00185 void clear() 00186 { 00187 maxCostPosition.first = 0; 00188 maxCostPosition.second = 0; 00189 A = NULL; 00190 B = NULL; 00191 qualities = NULL; 00192 m = 0; 00193 n = 0; 00194 MOffset = 0; 00195 NOffset = 0; 00196 allowedInsertDelete = 0; 00197 direction = 0; 00198 gapOpenCount = 0; 00199 gapCloseCount = 0; 00200 gapExtendCount = 0; 00201 } 00202 00203 // caller will be using set* methods to set everything up. 00204 SmithWaterman() 00205 { 00206 clear(); 00207 clearH(); 00208 } 00209 00210 // construct with everything and the kitchen sink: 00211 SmithWaterman( 00212 Atype *A, 00213 QualityType *qualities, 00214 Btype *B, 00215 int m, 00216 int n, 00217 int allowedInsertDelete = INT_MAX, 00218 int direction = 1, 00219 readIndexType MOffset = 0, 00220 referenceIndexType NOffset = 0): 00221 A(A), 00222 qualities(qualities), 00223 B(B), 00224 m(m), 00225 n(n), 00226 allowedInsertDelete(allowedInsertDelete), 00227 direction(direction), 00228 MOffset(MOffset), 00229 NOffset(NOffset), 00230 maxCostValue((HCellType) 0) 00231 { 00232 } 00233 00234 void setRead(Atype *A) 00235 { 00236 this->A = A; 00237 } 00238 void setReadQuality(QualityType *qualities) 00239 { 00240 this->qualities = qualities; 00241 } 00242 void setReference(Btype *B) 00243 { 00244 this->B = B; 00245 } 00246 00247 // Caller may wish to index into the read to do the matching against 00248 // only part of the read. 00249 // NB: the quality length and offset are the same as the read. 00250 void setReadLength(int m) 00251 { 00252 this->m = m; 00253 } 00254 void setReadOffset(readIndexType MOffset) 00255 { 00256 this->MOffset = MOffset; 00257 } 00258 00259 // The reference is typically long, and not necessarily a char *, 00260 // so we provide an offset here. If it were always a char *, 00261 // we'd just modify the caller to point directly at the reference 00262 // location. 00263 void setReferenceLength(int n) 00264 { 00265 this->n = n; 00266 } 00267 void setReferenceOffset(referenceIndexType NOffset) 00268 { 00269 this->NOffset = NOffset; 00270 } 00271 00272 // 00273 // Configuration: how wide is the band on the diagonal? 00274 // We should keep this small -- 1, 2, 3 or similar. If 00275 // the value is default (INT_MAX), then the full matrix 00276 // will be built, which is fine, but quite slow. 00277 // 00278 // If this paramater is made smaller than when a previous 00279 // call to populateH was made, clearH will also need to be called. 00280 // 00281 void setAllowedInsertDelete(int allowedInsertDelete = INT_MAX) 00282 { 00283 this->allowedInsertDelete = allowedInsertDelete; 00284 } 00285 00286 // 00287 // Configuration: which end do we begin performing SW matching 00288 // from? We need this because of index 'anchors' in the karma 00289 // matcher. 00290 void setDirection(int direction) 00291 { 00292 this->direction = direction; 00293 } 00294 00295 void clearH() 00296 { 00297 memset(H, 0, sizeof(H)); 00298 } 00299 00300 void populateH() 00301 { 00302 00303 maxCostValue = 0; 00304 00305 for (int i=1; i<=m ; i++) 00306 { 00307 00308 // implement a banded Smith-Waterman approach: 00309 int low = MAX(1, i - allowedInsertDelete); 00310 int high = MIN(n, i + allowedInsertDelete); 00311 00312 for (int j=low; j<=high ; j++) 00313 { 00314 HCellType c; 00315 c = 0; 00316 if (direction>0) c = MAX(c, H[i-1][j-1] + (((*A)[MOffset + i-1]==(*B)[NOffset + j-1]) ? w.match : w.misMatch)); 00317 else c = MAX(c, H[i-1][j-1] + (((*A)[MOffset + m-i+0]==(*B)[NOffset + n-j+0]) ? w.match : w.misMatch)); 00318 c = MAX(c, H[i-1][j] + w.del); 00319 c = MAX(c, H[i][j-1] + w.insert); 00320 H[i][j] = c; 00321 if (c>maxCostValue) 00322 { 00323 maxCostValue = c; 00324 maxCostPosition.first = i; 00325 maxCostPosition.second = j; 00326 } 00327 } 00328 } 00329 } 00330 00331 // 00332 // Given the matrix H as filled in by above routine, print it out. 00333 // 00334 void printH(bool prettyPrint = true) 00335 { 00336 // print the scoring matrix: 00337 for (int i=-1; i<=m ; i++) 00338 { 00339 for (int j=-1; j<=n ; j++) 00340 { 00341 if (prettyPrint) cout << setw(3); 00342 if (i==-1 && j==-1) 00343 { 00344 if (prettyPrint) cout << " "; 00345 else cout << "\t"; 00346 } 00347 else if (j==-1) 00348 { 00349 if (!prettyPrint) cout << "\t"; 00350 if (i==0) cout << "-"; 00351 else cout << (*A)[MOffset + direction>0 ? i-1 : m - i]; 00352 } 00353 else if (i==-1) 00354 { 00355 if (!prettyPrint) cout << "\t"; 00356 if (j==0) cout << "-"; 00357 else cout << (*B)[NOffset + direction>0 ? j-1 : n - j]; 00358 } 00359 else 00360 { 00361 if (!prettyPrint) cout << "\t"; 00362 cout << H[i][j]; 00363 } 00364 } 00365 cout << endl; 00366 } 00367 } 00368 00369 void debugPrint(bool doPrintH = true) 00370 { 00371 if (doPrintH) printH(); 00372 cout << "maxCostPosition = " << maxCostPosition << std::endl; 00373 if (alignment.empty()) cout << "alignment vector is empty.\n"; 00374 else 00375 { 00376 cout << "alignment vector:\n"; 00377 for (vector<pair<int,int> >::iterator i=alignment.begin(); i < alignment.end(); i++) 00378 { 00379 cout << (i - alignment.begin()) << ": " << *i << "\n"; 00380 } 00381 } 00382 cout << std::endl; 00383 } 00384 00385 // 00386 // Given the Matrix H as filled in by populateH, fill in the 00387 // alignment vector with the indeces of the optimal match. 00388 // 00389 void populateAlignment() 00390 { 00391 alignment.clear(); 00392 int i = m, j = n; 00393 00394 i = maxCostPosition.first; 00395 j = maxCostPosition.second; 00396 00397 // 00398 // Stop when we either reach zero cost cell or 00399 // when we reach the upper left corner of H. 00400 // A zero cost cell to the lower right means we 00401 // are soft clipping that end. 00402 // 00403 while (H[i][j] > 0 || (i>0 && j>0)) 00404 { 00405 // #define DEBUG_ALIGNMENT_VECTOR 00406 #if defined(DEBUG_ALIGNMENT_VECTOR) 00407 cout << "alignment.push_back(" << i << ", " << j << ")" << endl; 00408 #endif 00409 alignment.push_back(pair<int,int>(i,j)); 00410 if (H[i-1][j-1]>=H[i-1][j] && H[i-1][j-1]>=H[i][j-1]) 00411 { 00412 // diagonal upper left cell is biggest 00413 i--; 00414 j--; 00415 } 00416 else if (H[i-1][j] < H[i][j-1]) 00417 { 00418 // upper cell is biggest 00419 j--; 00420 } 00421 else 00422 { 00423 // left cell is biggest 00424 i--; 00425 } 00426 } 00427 alignment.push_back(pair<int,int>(i,j)); 00428 #if defined(DEBUG_ALIGNMENT_VECTOR) 00429 cout << "alignment.push_back(" << i << ", " << j << ")" << endl; 00430 cout << "alignment.size(): " << alignment.size() << endl; 00431 #endif 00432 } 00433 00434 // 00435 // Compute the sumQ for a read that has been mapped using populateH(). 00436 // 00437 // In the simplest case, the read lies on the diagonal of the 00438 // matrix H, which means it has only matches and mismatches: 00439 // no inserts or deletes. 00440 // 00441 // However, in general, it is possible to have 0 or more insert, 00442 // delete, mismatch and soft clipped bases in the read, so we 00443 // need to accomodate all of those variations. 00444 // 00445 // XXX finish this. 00446 // 00447 00448 int getSumQ() 00449 { 00450 if (direction>0) return getSumQForward(); 00451 else return getSumQBackward(); 00452 } 00453 00454 int getSumQForward() 00455 { 00456 int sumQ = 0; 00457 vector<pair<int,int> >::reverse_iterator i; 00458 00459 for (i=alignment.rbegin(); i < alignment.rend() - 1; i++) 00460 { 00461 // #define DEBUG_GETSUMQ 00462 #if defined(DEBUG_GETSUMQ) 00463 cout << *i << ": "; 00464 #endif 00465 if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second + 1)) 00466 { 00467 // match/mismatch 00468 #if defined(DEBUG_GETSUMQ) 00469 cout << "Match/Mismatch"; 00470 #endif 00471 if ((*A)[MOffset + (*i).first] != (*B)[NOffset + (*i).second]) 00472 sumQ += (*qualities)[MOffset + (*i).first] - '!'; 00473 } 00474 else if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second)) 00475 { 00476 // insert? 00477 #if defined(DEBUG_GETSUMQ) 00478 cout << "Insert"; 00479 #endif 00480 sumQ += 50; 00481 } 00482 else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second + 1)) 00483 { 00484 // delete? 00485 #if defined(DEBUG_GETSUMQ) 00486 cout << "Delete"; 00487 #endif 00488 sumQ += 50; 00489 } 00490 } 00491 #if defined(DEBUG_GETSUMQ) 00492 cout << endl; 00493 #endif 00494 return sumQ; 00495 } 00496 00497 int getSumQBackward() 00498 { 00499 int sumQ = 0; 00500 vector<pair<int,int> >::iterator i; 00501 00502 for (i=alignment.begin(); i < alignment.end() - 1; i++) 00503 { 00504 #if defined(DEBUG_GETSUMQ) 00505 cout << *i << ": "; 00506 #endif 00507 if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second - 1)) 00508 { 00509 // match/mismatch 00510 #if defined(DEBUG_GETSUMQ) 00511 cout << "Match/Mismatch"; 00512 #endif 00513 if ((*A)[MOffset + m - (*i).first] != (*B)[NOffset + n - (*i).second]) 00514 sumQ += (*qualities)[MOffset + m - (*i).first] - '!'; 00515 } 00516 else if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second)) 00517 { 00518 // insert? 00519 #if defined(DEBUG_GETSUMQ) 00520 cout << "Insert?"; 00521 #endif 00522 sumQ += 50; 00523 } 00524 else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second - 1)) 00525 { 00526 // delete? 00527 #if defined(DEBUG_GETSUMQ) 00528 cout << "Delete?"; 00529 #endif 00530 sumQ += 50; 00531 } 00532 } 00533 #if defined(DEBUG_GETSUMQ) 00534 cout << endl; 00535 #endif 00536 return sumQ; 00537 } 00538 00539 #if 0 00540 int getSumQ() 00541 { 00542 vector<pair<int,int> >::reverse_iterator i; 00543 int sumQ = 0; 00544 for (i=alignment.rbegin(); i < alignment.rend() - 1; i++) 00545 { 00546 #if defined(DEBUG_ALIGNMENT_VECTOR) 00547 cout << "i: " << i - alignment.rbegin() << *i << endl; 00548 #endif 00549 // XXX NOT THIS SIMPLE - need to account for indels 00550 if (direction>0) 00551 { 00552 if ((*A)[MOffset + (*i).first] != (*B)[NOffset + (*i).second]) 00553 sumQ += (*qualities)[MOffset + (*i).first] - '!'; 00554 } 00555 else 00556 { 00557 // m and n are sizes, first and second are 1 based offsets 00558 if ((*A)[MOffset + m - (*i).first] != (*B)[NOffset + n - (*i).second]) 00559 sumQ += (*qualities)[MOffset + m - (*i).first] - '!'; 00560 } 00561 } 00562 return sumQ; 00563 } 00564 #endif 00565 00566 // 00567 // Append cigar operations to an existing cigar list. 00568 // 00569 // XXX we no longer need the CigarRoller += methods. 00570 // 00571 // In this case, the Smith Waterman array H was created from 00572 // the read and reference in the forward direction. 00573 // 00574 void rollCigarForward(CigarRoller &cigar) 00575 { 00576 vector<pair<int,int> >::reverse_iterator i; 00577 00578 for (i=alignment.rbegin(); i < alignment.rend() - 1; i++) 00579 { 00580 // #define DEBUG_CIGAR 00581 #if defined(DEBUG_CIGAR) 00582 cout << *i << ": "; 00583 #endif 00584 if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second + 1)) 00585 { 00586 // match/mismatch 00587 #if defined(DEBUG_CIGAR) 00588 cout << "Match/Mismatch"; 00589 #endif 00590 cigar.Add(CigarRoller::match, 1); 00591 } 00592 else if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second)) 00593 { 00594 // insert? 00595 #if defined(DEBUG_CIGAR) 00596 cout << "Insert"; 00597 #endif 00598 cigar.Add(CigarRoller::insert, 1); 00599 } 00600 else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second + 1)) 00601 { 00602 // delete? 00603 #if defined(DEBUG_CIGAR) 00604 cout << "Delete"; 00605 #endif 00606 cigar.Add(CigarRoller::del, 1); 00607 } 00608 } 00609 // if there is soft clipping, allow for it (::Add will 00610 // ignore if the count is 0): 00611 cigar.Add(CigarRoller::softClip, getSoftClipCount()); 00612 #if defined(DEBUG_CIGAR) 00613 cout << endl; 00614 #endif 00615 } 00616 00617 // 00618 // Append cigar operations to an existing cigar list. 00619 // 00620 // XXX we no longer need the CigarRoller += methods. 00621 // 00622 // In this case, the Smith Waterman array H was created from 00623 // the read and reference in the reverse direction. 00624 // 00625 void rollCigarBackward(CigarRoller &cigar) 00626 { 00627 vector<pair<int,int> >::iterator i; 00628 00629 // if there is soft clipping, allow for it (::Add will 00630 // ignore if the count is 0): 00631 cigar.Add(CigarRoller::softClip, getSoftClipCount()); 00632 00633 i = alignment.begin(); 00634 00635 for (i=alignment.begin(); 00636 i < alignment.end() - 1; 00637 i++) 00638 { 00639 #if defined(DEBUG_CIGAR) 00640 cout << *i << ": "; 00641 #endif 00642 if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second - 1)) 00643 { 00644 // match/mismatch 00645 #if defined(DEBUG_CIGAR) 00646 cout << "Match/Mismatch"; 00647 #endif 00648 cigar.Add(CigarRoller::match, 1); 00649 } 00650 else if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second)) 00651 { 00652 // insert? 00653 #if defined(DEBUG_CIGAR) 00654 cout << "Insert?"; 00655 #endif 00656 cigar.Add(CigarRoller::insert, 1); 00657 } 00658 else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second - 1)) 00659 { 00660 // delete? 00661 #if defined(DEBUG_CIGAR) 00662 cout << "Delete?"; 00663 #endif 00664 cigar.Add(CigarRoller::del, 1); 00665 } 00666 } 00667 #if defined(DEBUG_CIGAR) 00668 cout << endl; 00669 #endif 00670 } 00671 00672 // 00673 // Given the direction, and the alignment vector, obtain 00674 // the soft clip (the mismatches at the end of the string which 00675 // can in Smith Waterman matching be considered as a separate case). 00676 // 00677 // NB: be careful that the backward case is correct - it passes 00678 // all of two built in tests, but it may not be generally correct. 00679 // 00680 int getSoftClipCount() 00681 { 00682 if (direction>0) 00683 { 00684 // invariant: assert(maxCostPosition == alignment.front()); 00685 return m - maxCostPosition.first; 00686 } 00687 else 00688 { 00689 // return alignment.back().first; // nope, this always returns 0 00690 // XXX BE CAREFUL... not sure this is right, either. 00691 // return n - maxCostPosition.second; 00692 return m - maxCostPosition.first; 00693 } 00694 } 00695 00696 void rollCigar(CigarRoller &cigar) 00697 { 00698 if (direction>0) rollCigarForward(cigar); 00699 else rollCigarBackward(cigar); 00700 } 00701 00702 // 00703 // all in one local alignment: 00704 // 00705 // Steps: 00706 // 1 - do internal setup 00707 // 2 - populate H 00708 // 3 - create alignment vector (this chooses the best path) 00709 // 4 - compute sumQ 00710 // 5 - compute the cigar string 00711 // 6 - compute and update the softclip for the read 00712 // 00713 bool localAlignment( 00714 uint32_t bandSize, 00715 Atype &read, 00716 readIndexType readLength, 00717 QualityType &quality, 00718 Btype &reference, 00719 referenceIndexType referenceLength, 00720 referenceIndexType referenceOffset, 00721 CigarRoller &cigarRoller, 00722 uint32_t &softClipCount, 00723 referenceIndexType &cigarStartingPoint, 00724 int &sumQ 00725 ) 00726 { 00727 00728 clear(); 00729 00730 cigarRoller.clear(); 00731 00732 setDirection(+1); 00733 setAllowedInsertDelete(bandSize); 00734 00735 setRead(&read); 00736 setReadOffset(0); 00737 setReadLength(readLength); 00738 00739 setReadQuality(&quality); 00740 00741 setReference(&reference); 00742 setReferenceOffset(referenceOffset); 00743 setReferenceLength(referenceLength); 00744 00745 populateH(); 00746 00747 softClipCount = getSoftClipCount(); 00748 00749 populateAlignment(); 00750 00751 rollCigar(cigarRoller); 00752 00753 sumQ = getSumQ(); 00754 00755 return false; 00756 00757 }; 00758 00759 }; 00760 00761 #endif