|
libStatGen Software
1
|
Public Member Functions | |
| GreedyTupleAligner (Weight &wt) | |
| int | MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch) |
| Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'. | |
| void | Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition) |
| Core local alignment algorithm. | |
Definition at line 60 of file GreedyTupleAligner.h.
| void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align | ( | QueryType | query, |
| int | queryLength, | ||
| ReferenceType | reference, | ||
| ReferenceIndex | searchStartIndex, | ||
| int | searchSize, | ||
| CigarRoller & | cigarRoller, | ||
| ReferenceIndex & | matchPosition | ||
| ) | [inline] |
Core local alignment algorithm.
| query | input query |
| queryLength | length of query |
| reference | reference genome |
| searchStartIndex | matching starts here |
| searchSize | how far we will search |
| cigarRoller | store alignment results here |
| matchPosition | store match position |
Definition at line 159 of file GreedyTupleAligner.h.
References CigarRoller::Add(), CigarRoller::clear(), Cigar::del, Cigar::insert, Cigar::match, GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple(), Cigar::mismatch, and Cigar::softClip.
{
// Algorithm:
// finished align? (should we try different align position?)
// if not, try next tuple
// is the tuple aligned?
// yes, extend to previous, mark unmatched part mismatch or gap
// extend to next matched part
int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used
int queryMatchCount = 0; // query matched # of bases
int q1 = 0; // to align
int pos = -1;
int lastR1 = -1; // index: record last
cigarRoller.clear();
matchPosition = -1;
while (queryMatchCount < queryLength)
{
if (r1 == searchSize - 1) // touched ref right boundary
{
cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
break;
}
if (queryLength - q1 < tupleSize)
{
// XXX this needs to do something more sane
// printf("some bases left!\n");
// a simple fix: treat all left-over bases as mismatches/matches
cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
break;
}
int mismatch = 0;
int matchedLen = 0;
if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple
>= 0)
{
// found match position for tuple
if (lastR1<0)
matchPosition = pos;
//
// deal with left
//
if (lastR1>=0) // have previously aligned part of the query to the reference genome yet
{
if (pos > 0)
{
cigarRoller.Add(CigarRoller::del, pos);
}
}
else
{
lastR1 = pos;
}
r1 += pos;
r1 += matchedLen;
q1 += matchedLen;
//
// deal with right
//
cigarRoller.Add(CigarRoller::match, matchedLen);
queryMatchCount = q1;
lastR1 = r1;
continue;
} // end if
//
// try insertion
// maximum insert ? say 2
//
for (int i = 1; i < queryLength - q1 - tupleSize; i++)
{
int mismatch = 0;
int matchedLen = 0;
// check reference genome broundary
if (searchStartIndex + r1 >= reference.getNumberBases())
return;
if ((pos = MatchTuple(query+q1 + i ,
queryLength - q1 -i ,
reference,
searchStartIndex + r1,
searchSize - r1,
matchedLen,
mismatch)) // found match position for tuple
>= 0)
{
if (matchPosition < 0)
matchPosition = pos + q1 + i ;
}
queryMatchCount += i;
q1 += i;
cigarRoller.Add(CigarRoller::insert, i);
lastR1 = r1 + pos;
r1 += pos + tupleSize;
q1 += tupleSize;
// deal with right
while (searchStartIndex + r1 < reference.getNumberBases()
&& query[q1]==reference[searchStartIndex + r1]
&& q1 < queryLength)
{
r1++;
q1++;
}
if (q1 < queryLength)
{
cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
queryMatchCount = q1;
}
else
{
cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
queryMatchCount = queryLength ;
break ;
}
}
r1++;
q1++;
// try next
} // end while (queryMatchCount < queryLength)
}
| int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple | ( | const QueryType | query, |
| const int | queryLength, | ||
| const ReferenceType | reference, | ||
| const ReferenceIndex | searchStartIndex, | ||
| const int | searchSize, | ||
| int & | matchedLength, | ||
| int & | mismatch | ||
| ) | [inline] |
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.
| query | input query |
| queryLength | length of query |
| reference | reference sequence |
| searchStartIndex | the positino where search starts |
| searchSize | the total length in reference sequence that will be examine |
| matchedLength | store how many bases are matched |
| mismatch | store how many bases are mismatched |
Definition at line 79 of file GreedyTupleAligner.h.
Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().
{
// now use naive search,
// TODO: will incorportate KMP serach later
// TODO: adjust tolerance of mismatches
const int MAX_MISMATCH=2;
int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;
#if defined(DEBUG_GREEDY_ALIGNER)
cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
#endif
// here i is the matching position (inclusive)
// j is the matched length
for (int i = 0; i <= searchSize - tupleSize; i++)
{
int j = 0;
mismatch = 0;
while (j < queryLength)
{
if (searchStartIndex + i + j >= reference.getNumberBases())
break;
if (query[j] != reference[searchStartIndex + i + j])
{
mismatch++;
if (mismatch >= MAX_MISMATCH)
break;
}
j++;
}
if (j>0 && (j==queryLength)) j--;
while (searchStartIndex +i +j < reference.getNumberBases()
&& ((j+1) > mismatch)
&& mismatch>0
&& query[j] != reference[searchStartIndex + i+j])
{
// if pattern matching goes beyong the preset mismatch cutoff,
// we will have to go backwards
j--;
mismatch--;
}
int score = j - mismatch;
if (score > bestScore)
{
bestPos = i;
bestScore = score;
bestMismatch = mismatch;
bestMatchedLength = j+1;
}
}
if (bestScore > 0)
{
mismatch = bestMismatch;
matchedLength = bestMatchedLength;
return bestPos;
}
return -1;
}