Public Member Functions | |
GreedyTupleAligner (Weight &wt) | |
int | MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch) |
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'. | |
void | Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition) |
Core local alignment algorithm. |
Definition at line 60 of file GreedyTupleAligner.h.
void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align | ( | QueryType | query, | |
int | queryLength, | |||
ReferenceType | reference, | |||
ReferenceIndex | searchStartIndex, | |||
int | searchSize, | |||
CigarRoller & | cigarRoller, | |||
ReferenceIndex & | matchPosition | |||
) | [inline] |
Core local alignment algorithm.
query | input query | |
queryLength | length of query | |
reference | reference genome | |
searchStartIndex | matching starts here | |
searchSize | how far we will search | |
cigarRoller | store alignment results here | |
matchPosition | store match position |
Definition at line 159 of file GreedyTupleAligner.h.
References CigarRoller::Add(), CigarRoller::clear(), Cigar::del, Cigar::insert, Cigar::match, GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple(), Cigar::mismatch, and Cigar::softClip.
00167 { 00168 // Algorithm: 00169 // finished align? (should we try different align position?) 00170 // if not, try next tuple 00171 // is the tuple aligned? 00172 // yes, extend to previous, mark unmatched part mismatch or gap 00173 // extend to next matched part 00174 int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used 00175 int queryMatchCount = 0; // query matched # of bases 00176 int q1 = 0; // to align 00177 int pos = -1; 00178 int lastR1 = -1; // index: record last 00179 00180 cigarRoller.clear(); 00181 matchPosition = -1; 00182 00183 while (queryMatchCount < queryLength) 00184 { 00185 if (r1 == searchSize - 1) // touched ref right boundary 00186 { 00187 cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount); 00188 break; 00189 } 00190 if (queryLength - q1 < tupleSize) 00191 { 00192 // XXX this needs to do something more sane 00193 // printf("some bases left!\n"); 00194 // a simple fix: treat all left-over bases as mismatches/matches 00195 cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount); 00196 break; 00197 } 00198 int mismatch = 0; 00199 int matchedLen = 0; 00200 if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple 00201 00202 >= 0) 00203 { 00204 // found match position for tuple 00205 00206 if (lastR1<0) 00207 matchPosition = pos; 00208 00209 // 00210 // deal with left 00211 // 00212 if (lastR1>=0) // have previously aligned part of the query to the reference genome yet 00213 { 00214 if (pos > 0) 00215 { 00216 cigarRoller.Add(CigarRoller::del, pos); 00217 } 00218 } 00219 else 00220 { 00221 lastR1 = pos; 00222 } 00223 00224 r1 += pos; 00225 r1 += matchedLen; 00226 q1 += matchedLen; 00227 00228 // 00229 // deal with right 00230 // 00231 cigarRoller.Add(CigarRoller::match, matchedLen); 00232 queryMatchCount = q1; 00233 lastR1 = r1; 00234 00235 continue; 00236 } // end if 00237 00238 // 00239 // try insertion 00240 // maximum insert ? say 2 00241 // 00242 for (int i = 1; i < queryLength - q1 - tupleSize; i++) 00243 { 00244 int mismatch = 0; 00245 int matchedLen = 0; 00246 // check reference genome broundary 00247 if (searchStartIndex + r1 >= reference.getNumberBases()) 00248 return; 00249 if ((pos = MatchTuple(query+q1 + i , 00250 queryLength - q1 -i , 00251 reference, 00252 searchStartIndex + r1, 00253 searchSize - r1, 00254 matchedLen, 00255 mismatch)) // found match position for tuple 00256 >= 0) 00257 { 00258 if (matchPosition < 0) 00259 matchPosition = pos + q1 + i ; 00260 } 00261 queryMatchCount += i; 00262 q1 += i; 00263 cigarRoller.Add(CigarRoller::insert, i); 00264 00265 lastR1 = r1 + pos; 00266 r1 += pos + tupleSize; 00267 q1 += tupleSize; 00268 00269 // deal with right 00270 while (searchStartIndex + r1 < reference.getNumberBases() 00271 && query[q1]==reference[searchStartIndex + r1] 00272 && q1 < queryLength) 00273 { 00274 r1++; 00275 q1++; 00276 } 00277 if (q1 < queryLength) 00278 { 00279 cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount); 00280 queryMatchCount = q1; 00281 } 00282 else 00283 { 00284 cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount); 00285 queryMatchCount = queryLength ; 00286 break ; 00287 } 00288 } 00289 00290 r1++; 00291 q1++; 00292 00293 // try next 00294 } // end while (queryMatchCount < queryLength) 00295 }
int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple | ( | const QueryType | query, | |
const int | queryLength, | |||
const ReferenceType | reference, | |||
const ReferenceIndex | searchStartIndex, | |||
const int | searchSize, | |||
int & | matchedLength, | |||
int & | mismatch | |||
) | [inline] |
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.
query | input query | |
queryLength | length of query | |
reference | reference sequence | |
searchStartIndex | the positino where search starts | |
searchSize | the total length in reference sequence that will be examine | |
matchedLength | store how many bases are matched | |
mismatch | store how many bases are mismatched |
Definition at line 79 of file GreedyTupleAligner.h.
Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().
00087 { 00088 // now use naive search, 00089 // TODO: will incorportate KMP serach later 00090 // TODO: adjust tolerance of mismatches 00091 const int MAX_MISMATCH=2; 00092 int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1; 00093 00094 #if defined(DEBUG_GREEDY_ALIGNER) 00095 cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl; 00096 #endif 00097 // here i is the matching position (inclusive) 00098 // j is the matched length 00099 for (int i = 0; i <= searchSize - tupleSize; i++) 00100 { 00101 int j = 0; 00102 mismatch = 0; 00103 while (j < queryLength) 00104 { 00105 if (searchStartIndex + i + j >= reference.getNumberBases()) 00106 break; 00107 if (query[j] != reference[searchStartIndex + i + j]) 00108 { 00109 mismatch++; 00110 if (mismatch >= MAX_MISMATCH) 00111 break; 00112 } 00113 j++; 00114 } 00115 00116 if (j>0 && (j==queryLength)) j--; 00117 00118 while (searchStartIndex +i +j < reference.getNumberBases() 00119 && ((j+1) > mismatch) 00120 && mismatch>0 00121 && query[j] != reference[searchStartIndex + i+j]) 00122 { 00123 // if pattern matching goes beyong the preset mismatch cutoff, 00124 // we will have to go backwards 00125 j--; 00126 mismatch--; 00127 } 00128 00129 int score = j - mismatch; 00130 00131 if (score > bestScore) 00132 { 00133 bestPos = i; 00134 bestScore = score; 00135 bestMismatch = mismatch; 00136 bestMatchedLength = j+1; 00137 } 00138 } 00139 00140 if (bestScore > 0) 00141 { 00142 mismatch = bestMismatch; 00143 matchedLength = bestMatchedLength; 00144 return bestPos; 00145 } 00146 return -1; 00147 }