GreedyTupleAligner.h
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef _GREEDY_TUPLE_H
00019 #define _GREEDY_TUPLE_H
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <assert.h>
00025 #include <ctype.h>
00026 #include "Generic.h"
00027 #include "CigarRoller.h"
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 struct Weight
00041 {
00042 public:
00043 Weight()
00044 {
00045 gapOpen = gapExtend = -1;
00046 mismatch = -1;
00047 match= 2;
00048 };
00049 int gapOpen;
00050 int gapExtend;
00051 int mismatch;
00052 int match;
00053 };
00054
00055
00056
00057
00058
00059 template <typename QueryType, typename ReferenceType, typename ReferenceIndex>
00060 class GreedyTupleAligner
00061 {
00062 public:
00063 GreedyTupleAligner(Weight& wt): weight(wt)
00064 {}
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 int MatchTuple(
00080 const QueryType query,
00081 const int queryLength,
00082 const ReferenceType reference,
00083 const ReferenceIndex searchStartIndex,
00084 const int searchSize,
00085 int& matchedLength,
00086 int& mismatch)
00087 {
00088
00089
00090
00091 const int MAX_MISMATCH=2;
00092 int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;
00093
00094 #if defined(DEBUG_GREEDY_ALIGNER)
00095 cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
00096 #endif
00097
00098
00099 for (int i = 0; i <= searchSize - tupleSize; i++)
00100 {
00101 int j = 0;
00102 mismatch = 0;
00103 while (j < queryLength)
00104 {
00105 if (searchStartIndex + i + j >= reference.getNumberBases())
00106 break;
00107 if (query[j] != reference[searchStartIndex + i + j])
00108 {
00109 mismatch++;
00110 if (mismatch >= MAX_MISMATCH)
00111 break;
00112 }
00113 j++;
00114 }
00115
00116 if (j>0 && (j==queryLength)) j--;
00117
00118 while (searchStartIndex +i +j < reference.getNumberBases()
00119 && ((j+1) > mismatch)
00120 && mismatch>0
00121 && query[j] != reference[searchStartIndex + i+j])
00122 {
00123
00124
00125 j--;
00126 mismatch--;
00127 }
00128
00129 int score = j - mismatch;
00130
00131 if (score > bestScore)
00132 {
00133 bestPos = i;
00134 bestScore = score;
00135 bestMismatch = mismatch;
00136 bestMatchedLength = j+1;
00137 }
00138 }
00139
00140 if (bestScore > 0)
00141 {
00142 mismatch = bestMismatch;
00143 matchedLength = bestMatchedLength;
00144 return bestPos;
00145 }
00146 return -1;
00147 }
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 void Align(
00160 QueryType query,
00161 int queryLength,
00162 ReferenceType reference,
00163 ReferenceIndex searchStartIndex,
00164 int searchSize,
00165 CigarRoller& cigarRoller,
00166 ReferenceIndex& matchPosition)
00167 {
00168
00169
00170
00171
00172
00173
00174 int r1 = 0;
00175 int queryMatchCount = 0;
00176 int q1 = 0;
00177 int pos = -1;
00178 int lastR1 = -1;
00179
00180 cigarRoller.clear();
00181 matchPosition = -1;
00182
00183 while (queryMatchCount < queryLength)
00184 {
00185 if (r1 == searchSize - 1)
00186 {
00187 cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
00188 break;
00189 }
00190 if (queryLength - q1 < tupleSize)
00191 {
00192
00193
00194
00195 cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
00196 break;
00197 }
00198 int mismatch = 0;
00199 int matchedLen = 0;
00200 if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch))
00201
00202 >= 0)
00203 {
00204
00205
00206 if (lastR1<0)
00207 matchPosition = pos;
00208
00209
00210
00211
00212 if (lastR1>=0)
00213 {
00214 if (pos > 0)
00215 {
00216 cigarRoller.Add(CigarRoller::del, pos);
00217 }
00218 }
00219 else
00220 {
00221 lastR1 = pos;
00222 }
00223
00224 r1 += pos;
00225 r1 += matchedLen;
00226 q1 += matchedLen;
00227
00228
00229
00230
00231 cigarRoller.Add(CigarRoller::match, matchedLen);
00232 queryMatchCount = q1;
00233 lastR1 = r1;
00234
00235 continue;
00236 }
00237
00238
00239
00240
00241
00242 for (int i = 1; i < queryLength - q1 - tupleSize; i++)
00243 {
00244 int mismatch = 0;
00245 int matchedLen = 0;
00246
00247 if (searchStartIndex + r1 >= reference.getNumberBases())
00248 return;
00249 if ((pos = MatchTuple(query+q1 + i ,
00250 queryLength - q1 -i ,
00251 reference,
00252 searchStartIndex + r1,
00253 searchSize - r1,
00254 matchedLen,
00255 mismatch))
00256 >= 0)
00257 {
00258 if (matchPosition < 0)
00259 matchPosition = pos + q1 + i ;
00260 }
00261 queryMatchCount += i;
00262 q1 += i;
00263 cigarRoller.Add(CigarRoller::insert, i);
00264
00265 lastR1 = r1 + pos;
00266 r1 += pos + tupleSize;
00267 q1 += tupleSize;
00268
00269
00270 while (searchStartIndex + r1 < reference.getNumberBases()
00271 && query[q1]==reference[searchStartIndex + r1]
00272 && q1 < queryLength)
00273 {
00274 r1++;
00275 q1++;
00276 }
00277 if (q1 < queryLength)
00278 {
00279 cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
00280 queryMatchCount = q1;
00281 }
00282 else
00283 {
00284 cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
00285 queryMatchCount = queryLength ;
00286 break ;
00287 }
00288 }
00289
00290 r1++;
00291 q1++;
00292
00293
00294 }
00295 }
00296 private:
00297 static const int tupleSize = 3;
00298 Weight weight;
00299 };
00300
00301
00302 #endif