doxygen/current/SmithWaterman_8h_source.html

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017
00018 #if !defined(_SMITH_WATERMAN_)
00019 #define _SMITH_WATERMAN_
00020
00021 #include <string.h> // for inline use of strcat, etc
00022 #include <limits.h> // for INT_MAX
00023 #include <stdint.h> // for uint32_t and friends
00024
00025 //
00026 // This file implements a bi-directional, banded Smith Waterman matching
00027 // algorithm.
00028 //
00029 // The design is dictated by several observations:
00030 //
00031 //  - building the full matrix H for the read and reference is slow,
00032 //    so we perform it only for a band down the diagonal, thus speeding
00033 //    up the algorithm at the cost of reduced detection of insert/deletes.
00034 //
00035 //  - we must minimize data copying
00036 //
00037 //  - we must have the ability to test the algorithm quickly and simply,
00038 //    hence we implement the class as a template so that this file doesn't
00039 //    have to depend on Karma's GenomeSequence object, which is a relatively
00040 //    heavy weight object to do testing against.
00041 //
00042 //  - because Karma uses index words to determine match candidate locations
00043 //    across the genome, and because we use the banded Smith Waterman approach,
00044 //    we must provide bi-directional Smith Waterman matching.  See example below.
00045 //
00046 // To fully understand the examples below, make sure you understand
00047 // Phred quality scores, CIGAR strings, and pattern matching.
00048 //
00049 // Simple Functional Examples:
00050 //
00051 //   Given a read, a read quality and a reference, we want to obtain some
00052 //   measure of how well that read matches the reference at that given location.
00053 //   So, we have:
00054 //          Read: "ATCG"
00055 //       Quality: "$$$$" (Phred scores - all are decimal 3)
00056 //     Reference: "ATCG"
00057 //     We expect: sumQ = 0, and Cigar "4M"
00058 //
00059 // Complex Functional Examples:
00060 //          Read: "AATCG"
00061 //       Quality: "$$$$$" (Phred scores - all are decimal 3)
00062 //     Reference: "ATCG"
00063 //     We expect: sumQ = 3, and Cigar "1M1I3M"
00064 //
00065 // Backwards matching:
00066 // It is harder for me to construct a clear example, so imagine a read with
00067 // cumulative inserts or deletes that sum up to a number larger than the
00068 // width of the band along the diagonal.  If we perform SW in the 'wrong'
00069 // direction, we will lose the read entirely, whereas if we start from the
00070 // end that is known to be matching, we may obtain a good match for the bulk
00071 // of the read.
00072 //
00073 // For example, you could imagine a read where the first 10 bases had a mess
00074 // of inserts, but then was clean for the next 100 bases.  You'd want it
00075 // to match as many of the good bases as practical, even if you knew you were
00076 // losing information at the end.
00077 //
00078
00079 #include <assert.h>
00080 #include <stdio.h>
00081 #include <stdlib.h>
00082 #include <string>
00083 #include <iostream>
00084 #include <iomanip>
00085 #include <utility>
00086 #include <vector>
00087
00088 #include "CigarRoller.h"
00089 #include "Generic.h"
00090
00091 using std::cout;
00092 using std::cin;
00093 using std::cerr;
00094 using std::setw;
00095 using std::endl;
00096 using std::pair;
00097 using std::vector;
00098
00099 #if !defined(MAX)
00100 #define MAX(x,y) ((x)>(y) ? (x) : (y))
00101 #endif
00102 #if !defined(MIN)
00103 #define MIN(x,y) ((x)<(y) ? (x) : (y))
00104 #endif
00105
00106 //
00107 // Implement the core of Smith Waterman as described in:
00108 // http://en.wikipedia.org/wiki/Smith_waterman
00109 //
00110 // The only variation from the basic SW algorithm is the
00111 // use of a banded approach - to limit the algorithm to
00112 // a band along the diagonal.  This limits the maximum
00113 // additive number of indels, but allows an O(c*M) approach,
00114 // where c is the constant max number of indels allowed.
00115 //
00116 // This is implemented as function templates because for testing, it is easier
00117 // to use character arrays.  In our mapper, we will be using a
00118 // combination of a String object for the read and the genome object
00119 // as the reference.  Both can be indexed, and give a character (base)
00120 // value, but the code would be duplicated if we implement SW for
00121 // each type of argument.
00122 //
00123 // Htype -> the type of the array H cell (8 or 16 bit unsigned int)
00124 // Atype -> the read string type (must have Atype::operator [] defined)
00125 //
00126 template <int maxReadLengthH, int maxReferenceLengthH, typename HCellType, typename Atype, typename Btype, typename QualityType, typename readIndexType, typename referenceIndexType>
00127 class SmithWaterman
00128 {
00129 public:
00130
00131     //
00132     // XXX in theory, this weight should be sensitive
00133     // to the quality of the base, and should have
00134     // an appropriate cost for an indel, as well.
00135     //
00136     // I think we need to get rid of this, since it is
00137     // basically wrong for our needs.
00138     //
00139     struct weight
00140     {
00141         weight()
00142         {
00143             match=2;
00144             misMatch=-1;
00145             insert=-1;
00146             del=-1;
00147         };
00148
00149         int match;
00150         int misMatch;
00151         int insert;
00152         int del;
00153     };
00154
00155     HCellType   H[maxReadLengthH][maxReferenceLengthH];
00156     Atype   *A;
00157     Btype   *B;
00158     QualityType *qualities;
00159
00160     int     m,n;
00161     readIndexType MOffset; // constant offset to m (read)
00162     referenceIndexType NOffset; // constant offset to n (reference)
00163     weight  w;
00164     int     allowedInsertDelete;
00165     int     direction;
00166     int     gapOpenCount;
00167     int     gapCloseCount;
00168     int     gapExtendCount;
00169     vector<pair<int,int> > alignment;
00170     void clearAlignment()
00171     {
00172         alignment.clear();
00173     }
00174
00175     HCellType maxCostValue;    // max Cost value in H
00176     pair<int,int>   maxCostPosition;    // row/col of max cost value in H
00177
00178     //
00179     // Clear the member variables only.
00180     // To clear H, call clearH().
00181     //
00182     // In theory, clear() plus set() should be sufficient to
00183     // get a clean run, but I haven't tested this extensively.
00184     //
00185     void clear()
00186     {
00187         maxCostPosition.first = 0;
00188         maxCostPosition.second = 0;
00189         A = NULL;
00190         B = NULL;
00191         qualities = NULL;
00192         m = 0;
00193         n = 0;
00194         MOffset = 0;
00195         NOffset = 0;
00196         allowedInsertDelete = 0;
00197         direction = 0;
00198         gapOpenCount = 0;
00199         gapCloseCount = 0;
00200         gapExtendCount = 0;
00201     }
00202
00203     // caller will be using set* methods to set everything up.
00204     SmithWaterman()
00205     {
00206         clear();
00207         clearH();
00208     }
00209
00210     // construct with everything and the kitchen sink:
00211     SmithWaterman(
00212         Atype *A,
00213         QualityType *qualities,
00214         Btype *B,
00215         int m,
00216         int n,
00217         int allowedInsertDelete = INT_MAX,
00218         int direction = 1,
00219         readIndexType MOffset = 0,
00220         referenceIndexType NOffset = 0):
00221             A(A),
00222             qualities(qualities),
00223             B(B),
00224             m(m),
00225             n(n),
00226             allowedInsertDelete(allowedInsertDelete),
00227             direction(direction),
00228             MOffset(MOffset),
00229             NOffset(NOffset),
00230             maxCostValue((HCellType) 0)
00231     {
00232     }
00233
00234     void setRead(Atype *A)
00235     {
00236         this->A = A;
00237     }
00238     void setReadQuality(QualityType *qualities)
00239     {
00240         this->qualities = qualities;
00241     }
00242     void setReference(Btype *B)
00243     {
00244         this->B = B;
00245     }
00246
00247     // Caller may wish to index into the read to do the matching against
00248     // only part of the read.
00249     // NB: the quality length and offset are the same as the read.
00250     void setReadLength(int m)
00251     {
00252         this->m = m;
00253     }
00254     void setReadOffset(readIndexType MOffset)
00255     {
00256         this->MOffset = MOffset;
00257     }
00258
00259     // The reference is typically long, and not necessarily a char *,
00260     // so we provide an offset here.  If it were always a char *,
00261     // we'd just modify the caller to point directly at the reference
00262     // location.
00263     void setReferenceLength(int n)
00264     {
00265         this->n = n;
00266     }
00267     void setReferenceOffset(referenceIndexType NOffset)
00268     {
00269         this->NOffset = NOffset;
00270     }
00271
00272     //
00273     // Configuration: how wide is the band on the diagonal?
00274     // We should keep this small -- 1, 2, 3 or similar.  If
00275     // the value is default (INT_MAX), then the full matrix
00276     // will be built, which is fine, but quite slow.
00277     //
00278     // If this paramater is made smaller than when a previous
00279     // call to populateH was made, clearH will also need to be called.
00280     //
00281     void setAllowedInsertDelete(int allowedInsertDelete = INT_MAX)
00282     {
00283         this->allowedInsertDelete = allowedInsertDelete;
00284     }
00285
00286     //
00287     // Configuration: which end do we begin performing SW matching
00288     // from?  We need this because of index 'anchors' in the karma
00289     // matcher.
00290     void setDirection(int direction)
00291     {
00292         this->direction = direction;
00293     }
00294
00295     void clearH()
00296     {
00297         memset(H, 0, sizeof(H));
00298     }
00299
00300     void populateH()
00301     {
00302
00303         maxCostValue = 0;
00304
00305         for (int i=1; i<=m ; i++)
00306         {
00307
00308             // implement a banded Smith-Waterman approach:
00309             int low = MAX(1, i - allowedInsertDelete);
00310             int high = MIN(n, i + allowedInsertDelete);
00311
00312             for (int j=low; j<=high ; j++)
00313             {
00314                 HCellType c;
00315                 c = 0;
00316                 if (direction>0) c = MAX(c, H[i-1][j-1] + (((*A)[MOffset + i-1]==(*B)[NOffset + j-1]) ? w.match : w.misMatch));
00317                 else c = MAX(c, H[i-1][j-1] + (((*A)[MOffset + m-i+0]==(*B)[NOffset + n-j+0]) ? w.match : w.misMatch));
00318                 c = MAX(c, H[i-1][j] + w.del);
00319                 c = MAX(c, H[i][j-1] + w.insert);
00320                 H[i][j] = c;
00321                 if (c>maxCostValue)
00322                 {
00323                     maxCostValue = c;
00324                     maxCostPosition.first = i;
00325                     maxCostPosition.second = j;
00326                 }
00327             }
00328         }
00329     }
00330
00331 //
00332 // Given the matrix H as filled in by above routine, print it out.
00333 //
00334     void printH(bool prettyPrint = true)
00335     {
00336         // print the scoring matrix:
00337         for (int i=-1; i<=m ; i++)
00338         {
00339             for (int j=-1; j<=n ; j++)
00340             {
00341                 if (prettyPrint) cout << setw(3);
00342                 if (i==-1 && j==-1)
00343                 {
00344                     if (prettyPrint) cout << " ";
00345                     else cout << "\t";
00346                 }
00347                 else if (j==-1)
00348                 {
00349                     if (!prettyPrint) cout << "\t";
00350                     if (i==0) cout << "-";
00351                     else cout << (*A)[MOffset + direction>0 ? i-1 : m - i];
00352                 }
00353                 else if (i==-1)
00354                 {
00355                     if (!prettyPrint) cout << "\t";
00356                     if (j==0) cout << "-";
00357                     else cout << (*B)[NOffset + direction>0 ? j-1 : n - j];
00358                 }
00359                 else
00360                 {
00361                     if (!prettyPrint) cout << "\t";
00362                     cout << H[i][j];
00363                 }
00364             }
00365             cout << endl;
00366         }
00367     }
00368
00369     void debugPrint(bool doPrintH = true)
00370     {
00371         if (doPrintH) printH();
00372         cout << "maxCostPosition = " << maxCostPosition << std::endl;
00373         if (alignment.empty()) cout << "alignment vector is empty.\n";
00374         else
00375         {
00376             cout << "alignment vector:\n";
00377             for (vector<pair<int,int> >::iterator i=alignment.begin(); i < alignment.end(); i++)
00378             {
00379                 cout << (i - alignment.begin()) << ": " << *i << "\n";
00380             }
00381         }
00382         cout << std::endl;
00383     }
00384
00385 //
00386 // Given the Matrix H as filled in by populateH, fill in the
00387 // alignment vector with the indeces of the optimal match.
00388 //
00389     void populateAlignment()
00390     {
00391         alignment.clear();
00392         int i = m, j = n;
00393
00394         i = maxCostPosition.first;
00395         j = maxCostPosition.second;
00396
00397         //
00398         // Stop when we either reach zero cost cell or
00399         // when we reach the upper left corner of H.
00400         // A zero cost cell to the lower right means we
00401         // are soft clipping that end.
00402         //
00403         while (H[i][j] > 0 || (i>0 && j>0))
00404         {
00405 // #define DEBUG_ALIGNMENT_VECTOR
00406 #if defined(DEBUG_ALIGNMENT_VECTOR)
00407             cout << "alignment.push_back(" << i << ", " << j << ")" << endl;
00408 #endif
00409             alignment.push_back(pair<int,int>(i,j));
00410             if (H[i-1][j-1]>=H[i-1][j] && H[i-1][j-1]>=H[i][j-1])
00411             {
00412                 // diagonal upper left cell is biggest
00413                 i--;
00414                 j--;
00415             }
00416             else if (H[i-1][j] < H[i][j-1])
00417             {
00418                 // upper cell is biggest
00419                 j--;
00420             }
00421             else
00422             {
00423                 // left cell is biggest
00424                 i--;
00425             }
00426         }
00427         alignment.push_back(pair<int,int>(i,j));
00428 #if defined(DEBUG_ALIGNMENT_VECTOR)
00429         cout << "alignment.push_back(" << i << ", " << j << ")" << endl;
00430         cout << "alignment.size(): " << alignment.size() << endl;
00431 #endif
00432     }
00433
00434     //
00435     // Compute the sumQ for a read that has been mapped using populateH().
00436     //
00437     // In the simplest case, the read lies on the diagonal of the
00438     // matrix H, which means it has only matches and mismatches:
00439     // no inserts or deletes.
00440     //
00441     // However, in general, it is possible to have 0 or more insert,
00442     // delete, mismatch and soft clipped bases in the read, so we
00443     // need to accomodate all of those variations.
00444     //
00445     // XXX finish this.
00446     //
00447
00448     int getSumQ()
00449     {
00450         if (direction>0) return getSumQForward();
00451         else return getSumQBackward();
00452     }
00453
00454     int getSumQForward()
00455     {
00456         int sumQ = 0;
00457         vector<pair<int,int> >::reverse_iterator i;
00458
00459         for (i=alignment.rbegin(); i < alignment.rend() - 1; i++)
00460         {
00461 // #define DEBUG_GETSUMQ
00462 #if defined(DEBUG_GETSUMQ)
00463             cout << *i << ": ";
00464 #endif
00465             if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second + 1))
00466             {
00467                 // match/mismatch
00468 #if defined(DEBUG_GETSUMQ)
00469                 cout << "Match/Mismatch";
00470 #endif
00471                 if ((*A)[MOffset + (*i).first] != (*B)[NOffset + (*i).second])
00472                     sumQ += (*qualities)[MOffset + (*i).first] - '!';
00473             }
00474             else if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second))
00475             {
00476                 // insert?
00477 #if defined(DEBUG_GETSUMQ)
00478                 cout << "Insert";
00479 #endif
00480                 sumQ += 50;
00481             }
00482             else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second + 1))
00483             {
00484                 // delete?
00485 #if defined(DEBUG_GETSUMQ)
00486                 cout << "Delete";
00487 #endif
00488                 sumQ += 50;
00489             }
00490         }
00491 #if defined(DEBUG_GETSUMQ)
00492         cout << endl;
00493 #endif
00494         return sumQ;
00495     }
00496
00497     int getSumQBackward()
00498     {
00499         int sumQ = 0;
00500         vector<pair<int,int> >::iterator i;
00501
00502         for (i=alignment.begin(); i < alignment.end() - 1; i++)
00503         {
00504 #if defined(DEBUG_GETSUMQ)
00505             cout << *i << ": ";
00506 #endif
00507             if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second - 1))
00508             {
00509                 // match/mismatch
00510 #if defined(DEBUG_GETSUMQ)
00511                 cout << "Match/Mismatch";
00512 #endif
00513                 if ((*A)[MOffset + m - (*i).first] != (*B)[NOffset + n - (*i).second])
00514                     sumQ += (*qualities)[MOffset + m - (*i).first] - '!';
00515             }
00516             else if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second))
00517             {
00518                 // insert?
00519 #if defined(DEBUG_GETSUMQ)
00520                 cout << "Insert?";
00521 #endif
00522                 sumQ += 50;
00523             }
00524             else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second - 1))
00525             {
00526                 // delete?
00527 #if defined(DEBUG_GETSUMQ)
00528                 cout << "Delete?";
00529 #endif
00530                 sumQ += 50;
00531             }
00532         }
00533 #if defined(DEBUG_GETSUMQ)
00534         cout << endl;
00535 #endif
00536         return sumQ;
00537     }
00538
00539 #if 0
00540     int getSumQ()
00541     {
00542         vector<pair<int,int> >::reverse_iterator i;
00543         int sumQ = 0;
00544         for (i=alignment.rbegin(); i < alignment.rend() - 1; i++)
00545         {
00546 #if defined(DEBUG_ALIGNMENT_VECTOR)
00547             cout << "i: " << i - alignment.rbegin() << *i << endl;
00548 #endif
00549             // XXX NOT THIS SIMPLE - need to account for indels
00550             if (direction>0)
00551             {
00552                 if ((*A)[MOffset + (*i).first] != (*B)[NOffset + (*i).second])
00553                     sumQ += (*qualities)[MOffset + (*i).first] - '!';
00554             }
00555             else
00556             {
00557                 // m and n are sizes, first and second are 1 based offsets
00558                 if ((*A)[MOffset + m - (*i).first] != (*B)[NOffset + n - (*i).second])
00559                     sumQ += (*qualities)[MOffset + m - (*i).first] - '!';
00560             }
00561         }
00562         return sumQ;
00563     }
00564 #endif
00565
00566     //
00567     // Append cigar operations to an existing cigar list.
00568     //
00569     // XXX we no longer need the CigarRoller += methods.
00570     //
00571     // In this case, the Smith Waterman array H was created from
00572     // the read and reference in the forward direction.
00573     //
00574     void rollCigarForward(CigarRoller &cigar)
00575     {
00576         vector<pair<int,int> >::reverse_iterator i;
00577
00578         for (i=alignment.rbegin(); i < alignment.rend() - 1; i++)
00579         {
00580 // #define DEBUG_CIGAR
00581 #if defined(DEBUG_CIGAR)
00582             cout << *i << ": ";
00583 #endif
00584             if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second + 1))
00585             {
00586                 // match/mismatch
00587 #if defined(DEBUG_CIGAR)
00588                 cout << "Match/Mismatch";
00589 #endif
00590                 cigar.Add(CigarRoller::match, 1);
00591             }
00592             else if ((*(i+1)).first == ((*i).first+1) && (*(i+1)).second == ((*i).second))
00593             {
00594                 // insert?
00595 #if defined(DEBUG_CIGAR)
00596                 cout << "Insert";
00597 #endif
00598                 cigar.Add(CigarRoller::insert, 1);
00599             }
00600             else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second + 1))
00601             {
00602                 // delete?
00603 #if defined(DEBUG_CIGAR)
00604                 cout << "Delete";
00605 #endif
00606                 cigar.Add(CigarRoller::del, 1);
00607             }
00608         }
00609         // if there is soft clipping, allow for it (::Add will
00610         // ignore if the count is 0):
00611         cigar.Add(CigarRoller::softClip, getSoftClipCount());
00612 #if defined(DEBUG_CIGAR)
00613         cout << endl;
00614 #endif
00615     }
00616
00617     //
00618     // Append cigar operations to an existing cigar list.
00619     //
00620     // XXX we no longer need the CigarRoller += methods.
00621     //
00622     // In this case, the Smith Waterman array H was created from
00623     // the read and reference in the reverse direction.
00624     //
00625     void rollCigarBackward(CigarRoller &cigar)
00626     {
00627         vector<pair<int,int> >::iterator i;
00628
00629         // if there is soft clipping, allow for it (::Add will
00630         // ignore if the count is 0):
00631         cigar.Add(CigarRoller::softClip, getSoftClipCount());
00632
00633         i = alignment.begin();
00634
00635         for (i=alignment.begin();
00636                 i < alignment.end() - 1;
00637                 i++)
00638         {
00639 #if defined(DEBUG_CIGAR)
00640             cout << *i << ": ";
00641 #endif
00642             if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second - 1))
00643             {
00644                 // match/mismatch
00645 #if defined(DEBUG_CIGAR)
00646                 cout << "Match/Mismatch";
00647 #endif
00648                 cigar.Add(CigarRoller::match, 1);
00649             }
00650             else if ((*(i+1)).first == ((*i).first-1) && (*(i+1)).second == ((*i).second))
00651             {
00652                 // insert?
00653 #if defined(DEBUG_CIGAR)
00654                 cout << "Insert?";
00655 #endif
00656                 cigar.Add(CigarRoller::insert, 1);
00657             }
00658             else if ((*(i+1)).first == ((*i).first) && (*(i+1)).second == ((*i).second - 1))
00659             {
00660                 // delete?
00661 #if defined(DEBUG_CIGAR)
00662                 cout << "Delete?";
00663 #endif
00664                 cigar.Add(CigarRoller::del, 1);
00665             }
00666         }
00667 #if defined(DEBUG_CIGAR)
00668         cout << endl;
00669 #endif
00670     }
00671
00672     //
00673     // Given the direction, and the alignment vector, obtain
00674     // the soft clip (the mismatches at the end of the string which
00675     // can in Smith Waterman matching be considered as a separate case).
00676     //
00677     // NB: be careful that the backward case is correct - it passes
00678     // all of two built in tests, but it may not be generally correct.
00679     //
00680     int getSoftClipCount()
00681     {
00682         if (direction>0)
00683         {
00684             // invariant: assert(maxCostPosition == alignment.front());
00685             return m - maxCostPosition.first;
00686         }
00687         else
00688         {
00689 //          return alignment.back().first;  // nope, this always returns 0
00690             // XXX BE CAREFUL... not sure this is right, either.
00691 //          return n - maxCostPosition.second;
00692             return m - maxCostPosition.first;
00693         }
00694     }
00695
00696     void rollCigar(CigarRoller &cigar)
00697     {
00698         if (direction>0) rollCigarForward(cigar);
00699         else rollCigarBackward(cigar);
00700     }
00701
00702     //
00703     // all in one local alignment:
00704     //
00705     // Steps:
00706     //   1 - do internal setup
00707     //   2 - populate H
00708     //   3 - create alignment vector (this chooses the best path)
00709     //   4 - compute sumQ
00710     //   5 - compute the cigar string
00711     //   6 - compute and update the softclip for the read
00712     //
00713     bool localAlignment(
00714         uint32_t bandSize,
00715         Atype &read,
00716         readIndexType readLength,
00717         QualityType &quality,
00718         Btype &reference,
00719         referenceIndexType referenceLength,
00720         referenceIndexType referenceOffset,
00721         CigarRoller &cigarRoller,
00722         uint32_t &softClipCount,
00723         referenceIndexType &cigarStartingPoint,
00724         int             &sumQ
00725     )
00726     {
00727
00728         clear();
00729
00730         cigarRoller.clear();
00731
00732         setDirection(+1);
00733         setAllowedInsertDelete(bandSize);
00734
00735         setRead(&read);
00736         setReadOffset(0);
00737         setReadLength(readLength);
00738
00739         setReadQuality(&quality);
00740
00741         setReference(&reference);
00742         setReferenceOffset(referenceOffset);
00743         setReferenceLength(referenceLength);
00744
00745         populateH();
00746
00747         softClipCount = getSoftClipCount();
00748
00749         populateAlignment();
00750
00751         rollCigar(cigarRoller);
00752
00753         sumQ = getSumQ();
00754
00755         return false;
00756
00757     };
00758
00759 };
00760
00761 #endif