libStatGen Software
1
|
Public Member Functions | |
GreedyTupleAligner (Weight &wt) | |
int | MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch) |
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'. | |
void | Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition) |
Core local alignment algorithm. |
Definition at line 60 of file GreedyTupleAligner.h.
void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align | ( | QueryType | query, |
int | queryLength, | ||
ReferenceType | reference, | ||
ReferenceIndex | searchStartIndex, | ||
int | searchSize, | ||
CigarRoller & | cigarRoller, | ||
ReferenceIndex & | matchPosition | ||
) | [inline] |
Core local alignment algorithm.
query | input query |
queryLength | length of query |
reference | reference genome |
searchStartIndex | matching starts here |
searchSize | how far we will search |
cigarRoller | store alignment results here |
matchPosition | store match position |
Definition at line 159 of file GreedyTupleAligner.h.
References CigarRoller::Add(), CigarRoller::clear(), Cigar::del, Cigar::insert, Cigar::match, GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple(), Cigar::mismatch, and Cigar::softClip.
{ // Algorithm: // finished align? (should we try different align position?) // if not, try next tuple // is the tuple aligned? // yes, extend to previous, mark unmatched part mismatch or gap // extend to next matched part int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used int queryMatchCount = 0; // query matched # of bases int q1 = 0; // to align int pos = -1; int lastR1 = -1; // index: record last cigarRoller.clear(); matchPosition = -1; while (queryMatchCount < queryLength) { if (r1 == searchSize - 1) // touched ref right boundary { cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount); break; } if (queryLength - q1 < tupleSize) { // XXX this needs to do something more sane // printf("some bases left!\n"); // a simple fix: treat all left-over bases as mismatches/matches cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount); break; } int mismatch = 0; int matchedLen = 0; if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple >= 0) { // found match position for tuple if (lastR1<0) matchPosition = pos; // // deal with left // if (lastR1>=0) // have previously aligned part of the query to the reference genome yet { if (pos > 0) { cigarRoller.Add(CigarRoller::del, pos); } } else { lastR1 = pos; } r1 += pos; r1 += matchedLen; q1 += matchedLen; // // deal with right // cigarRoller.Add(CigarRoller::match, matchedLen); queryMatchCount = q1; lastR1 = r1; continue; } // end if // // try insertion // maximum insert ? say 2 // for (int i = 1; i < queryLength - q1 - tupleSize; i++) { int mismatch = 0; int matchedLen = 0; // check reference genome broundary if (searchStartIndex + r1 >= reference.getNumberBases()) return; if ((pos = MatchTuple(query+q1 + i , queryLength - q1 -i , reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple >= 0) { if (matchPosition < 0) matchPosition = pos + q1 + i ; } queryMatchCount += i; q1 += i; cigarRoller.Add(CigarRoller::insert, i); lastR1 = r1 + pos; r1 += pos + tupleSize; q1 += tupleSize; // deal with right while (searchStartIndex + r1 < reference.getNumberBases() && query[q1]==reference[searchStartIndex + r1] && q1 < queryLength) { r1++; q1++; } if (q1 < queryLength) { cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount); queryMatchCount = q1; } else { cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount); queryMatchCount = queryLength ; break ; } } r1++; q1++; // try next } // end while (queryMatchCount < queryLength) }
int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple | ( | const QueryType | query, |
const int | queryLength, | ||
const ReferenceType | reference, | ||
const ReferenceIndex | searchStartIndex, | ||
const int | searchSize, | ||
int & | matchedLength, | ||
int & | mismatch | ||
) | [inline] |
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.
query | input query |
queryLength | length of query |
reference | reference sequence |
searchStartIndex | the positino where search starts |
searchSize | the total length in reference sequence that will be examine |
matchedLength | store how many bases are matched |
mismatch | store how many bases are mismatched |
Definition at line 79 of file GreedyTupleAligner.h.
Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().
{ // now use naive search, // TODO: will incorportate KMP serach later // TODO: adjust tolerance of mismatches const int MAX_MISMATCH=2; int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1; #if defined(DEBUG_GREEDY_ALIGNER) cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl; #endif // here i is the matching position (inclusive) // j is the matched length for (int i = 0; i <= searchSize - tupleSize; i++) { int j = 0; mismatch = 0; while (j < queryLength) { if (searchStartIndex + i + j >= reference.getNumberBases()) break; if (query[j] != reference[searchStartIndex + i + j]) { mismatch++; if (mismatch >= MAX_MISMATCH) break; } j++; } if (j>0 && (j==queryLength)) j--; while (searchStartIndex +i +j < reference.getNumberBases() && ((j+1) > mismatch) && mismatch>0 && query[j] != reference[searchStartIndex + i+j]) { // if pattern matching goes beyong the preset mismatch cutoff, // we will have to go backwards j--; mismatch--; } int score = j - mismatch; if (score > bestScore) { bestPos = i; bestScore = score; bestMismatch = mismatch; bestMatchedLength = j+1; } } if (bestScore > 0) { mismatch = bestMismatch; matchedLength = bestMatchedLength; return bestPos; } return -1; }