GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex > Class Template Reference

Collaboration diagram for GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 GreedyTupleAligner (Weight &wt)
int MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch)
 Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.
void Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition)
 Core local alignment algorithm.

Detailed Description

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
class GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >

Definition at line 60 of file GreedyTupleAligner.h.


Member Function Documentation

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align ( QueryType  query,
int  queryLength,
ReferenceType  reference,
ReferenceIndex  searchStartIndex,
int  searchSize,
CigarRoller cigarRoller,
ReferenceIndex &  matchPosition 
) [inline]

Core local alignment algorithm.

Parameters:
query input query
queryLength length of query
reference reference genome
searchStartIndex matching starts here
searchSize how far we will search
cigarRoller store alignment results here
matchPosition store match position

Definition at line 159 of file GreedyTupleAligner.h.

References GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple().

00167     {
00168         // Algorithm:
00169         // finished align? (should we try different align position?)
00170         // if not, try next tuple
00171         //    is the tuple aligned?
00172         //    yes, extend to previous, mark unmatched part mismatch or gap
00173         //         extend to next matched part
00174         int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used
00175         int queryMatchCount = 0; // query matched # of bases
00176         int q1 = 0; // to align
00177         int pos = -1;
00178         int lastR1 = -1; // index: record last
00179 
00180         cigarRoller.clear();
00181         matchPosition = -1;
00182 
00183         while (queryMatchCount < queryLength)
00184         {
00185             if (r1 == searchSize - 1)   // touched ref right boundary
00186             {
00187                 cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
00188                 break;
00189             }
00190             if (queryLength - q1 < tupleSize)
00191             {
00192                 // XXX this needs to do something more sane
00193                 // printf("some bases left!\n");
00194                 // a simple fix: treat all left-over bases as mismatches/matches
00195                 cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
00196                 break;
00197             }
00198             int mismatch = 0;
00199             int matchedLen = 0;
00200             if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple
00201 
00202                     >= 0)
00203             {
00204                 // found match position for tuple
00205 
00206                 if (lastR1<0)
00207                     matchPosition = pos;
00208 
00209                 //
00210                 // deal with left
00211                 //
00212                 if (lastR1>=0)   // have previously aligned part of the query to the reference genome yet
00213                 {
00214                     if (pos > 0)
00215                     {
00216                         cigarRoller.Add(CigarRoller::del, pos);
00217                     }
00218                 }
00219                 else
00220                 {
00221                     lastR1 = pos;
00222                 }
00223 
00224                 r1 += pos;
00225                 r1 += matchedLen;
00226                 q1 += matchedLen;
00227 
00228                 //
00229                 // deal with right
00230                 //
00231                 cigarRoller.Add(CigarRoller::match, matchedLen);
00232                 queryMatchCount = q1;
00233                 lastR1 = r1;
00234 
00235                 continue;
00236             } // end if
00237 
00238             //
00239             // try insertion
00240             // maximum insert ? say 2
00241             //
00242             for (int i = 1; i < queryLength - q1 - tupleSize; i++)
00243             {
00244                 int mismatch = 0;
00245                 int matchedLen = 0;
00246                 // check reference genome broundary
00247                 if (searchStartIndex + r1 >= reference.getNumberBases())
00248                     return;
00249                 if ((pos = MatchTuple(query+q1 + i ,
00250                                       queryLength - q1 -i ,
00251                                       reference,
00252                                       searchStartIndex + r1,
00253                                       searchSize - r1,
00254                                       matchedLen,
00255                                       mismatch)) // found match position for tuple
00256                         >= 0)
00257                 {
00258                     if (matchPosition < 0)
00259                         matchPosition = pos + q1 + i ;
00260                 }
00261                 queryMatchCount += i;
00262                 q1 += i;
00263                 cigarRoller.Add(CigarRoller::insert, i);
00264 
00265                 lastR1 = r1 + pos;
00266                 r1 += pos + tupleSize;
00267                 q1 += tupleSize;
00268 
00269                 // deal with right
00270                 while (searchStartIndex + r1 < reference.getNumberBases()
00271                         && query[q1]==reference[searchStartIndex + r1]
00272                         && q1 < queryLength)
00273                 {
00274                     r1++;
00275                     q1++;
00276                 }
00277                 if (q1 < queryLength)
00278                 {
00279                     cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
00280                     queryMatchCount = q1;
00281                 }
00282                 else
00283                 {
00284                     cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
00285                     queryMatchCount = queryLength ;
00286                     break ;
00287                 }
00288             }
00289 
00290             r1++;
00291             q1++;
00292 
00293             // try next
00294         } // end while (queryMatchCount < queryLength)
00295     }

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple ( const QueryType  query,
const int  queryLength,
const ReferenceType  reference,
const ReferenceIndex  searchStartIndex,
const int  searchSize,
int &  matchedLength,
int &  mismatch 
) [inline]

Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.

Parameters:
query input query
queryLength length of query
reference reference sequence
searchStartIndex the positino where search starts
searchSize the total length in reference sequence that will be examine
matchedLength store how many bases are matched
mismatch store how many bases are mismatched
Returns:
-1 for unsuccess return

Definition at line 79 of file GreedyTupleAligner.h.

Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().

00087     {
00088         // now use naive search,
00089         // TODO: will incorportate KMP serach later
00090         // TODO: adjust tolerance of mismatches
00091         const int MAX_MISMATCH=2;
00092         int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;
00093 
00094 #if defined(DEBUG_GREEDY_ALIGNER)
00095         cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
00096 #endif
00097         // here i is the matching position (inclusive)
00098         // j is the matched length
00099         for (int i = 0; i <= searchSize - tupleSize; i++)
00100         {
00101             int j = 0;
00102             mismatch = 0;
00103             while (j < queryLength)
00104             {
00105                 if (searchStartIndex + i + j >= reference.getNumberBases())
00106                     break;
00107                 if (query[j] != reference[searchStartIndex + i + j])
00108                 {
00109                     mismatch++;
00110                     if (mismatch >= MAX_MISMATCH)
00111                         break;
00112                 }
00113                 j++;
00114             }
00115 
00116             if (j>0 && (j==queryLength)) j--;
00117 
00118             while (searchStartIndex +i +j < reference.getNumberBases()
00119                     && ((j+1) > mismatch)
00120                     && mismatch>0
00121                     && query[j] != reference[searchStartIndex + i+j])
00122             {
00123                 // if pattern matching goes beyong the preset mismatch cutoff,
00124                 // we will have to go backwards
00125                 j--;
00126                 mismatch--;
00127             }
00128 
00129             int score = j - mismatch;
00130 
00131             if (score > bestScore)
00132             {
00133                 bestPos = i;
00134                 bestScore = score;
00135                 bestMismatch = mismatch;
00136                 bestMatchedLength = j+1;
00137             }
00138         }
00139 
00140         if (bestScore > 0)
00141         {
00142             mismatch = bestMismatch;
00143             matchedLength = bestMatchedLength;
00144             return bestPos;
00145         }
00146         return -1;
00147     }


The documentation for this class was generated from the following file:
Generated on Wed Nov 17 15:38:33 2010 for StatGen Software by  doxygen 1.6.3