libStatGen Software  1
GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex > Class Template Reference

List of all members.

Public Member Functions

 GreedyTupleAligner (Weight &wt)
int MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch)
 Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.
void Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition)
 Core local alignment algorithm.

Detailed Description

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
class GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >

Definition at line 60 of file GreedyTupleAligner.h.


Member Function Documentation

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align ( QueryType  query,
int  queryLength,
ReferenceType  reference,
ReferenceIndex  searchStartIndex,
int  searchSize,
CigarRoller cigarRoller,
ReferenceIndex &  matchPosition 
) [inline]

Core local alignment algorithm.

Parameters:
queryinput query
queryLengthlength of query
referencereference genome
searchStartIndexmatching starts here
searchSizehow far we will search
cigarRollerstore alignment results here
matchPositionstore match position

Definition at line 159 of file GreedyTupleAligner.h.

References CigarRoller::Add(), CigarRoller::clear(), Cigar::del, Cigar::insert, Cigar::match, GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple(), Cigar::mismatch, and Cigar::softClip.

    {
        // Algorithm:
        // finished align? (should we try different align position?)
        // if not, try next tuple
        //    is the tuple aligned?
        //    yes, extend to previous, mark unmatched part mismatch or gap
        //         extend to next matched part
        int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used
        int queryMatchCount = 0; // query matched # of bases
        int q1 = 0; // to align
        int pos = -1;
        int lastR1 = -1; // index: record last

        cigarRoller.clear();
        matchPosition = -1;

        while (queryMatchCount < queryLength)
        {
            if (r1 == searchSize - 1)   // touched ref right boundary
            {
                cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
                break;
            }
            if (queryLength - q1 < tupleSize)
            {
                // XXX this needs to do something more sane
                // printf("some bases left!\n");
                // a simple fix: treat all left-over bases as mismatches/matches
                cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
                break;
            }
            int mismatch = 0;
            int matchedLen = 0;
            if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple

                    >= 0)
            {
                // found match position for tuple

                if (lastR1<0)
                    matchPosition = pos;

                //
                // deal with left
                //
                if (lastR1>=0)   // have previously aligned part of the query to the reference genome yet
                {
                    if (pos > 0)
                    {
                        cigarRoller.Add(CigarRoller::del, pos);
                    }
                }
                else
                {
                    lastR1 = pos;
                }

                r1 += pos;
                r1 += matchedLen;
                q1 += matchedLen;

                //
                // deal with right
                //
                cigarRoller.Add(CigarRoller::match, matchedLen);
                queryMatchCount = q1;
                lastR1 = r1;

                continue;
            } // end if

            //
            // try insertion
            // maximum insert ? say 2
            //
            for (int i = 1; i < queryLength - q1 - tupleSize; i++)
            {
                int mismatch = 0;
                int matchedLen = 0;
                // check reference genome broundary
                if (searchStartIndex + r1 >= reference.getNumberBases())
                    return;
                if ((pos = MatchTuple(query+q1 + i ,
                                      queryLength - q1 -i ,
                                      reference,
                                      searchStartIndex + r1,
                                      searchSize - r1,
                                      matchedLen,
                                      mismatch)) // found match position for tuple
                        >= 0)
                {
                    if (matchPosition < 0)
                        matchPosition = pos + q1 + i ;
                }
                queryMatchCount += i;
                q1 += i;
                cigarRoller.Add(CigarRoller::insert, i);

                lastR1 = r1 + pos;
                r1 += pos + tupleSize;
                q1 += tupleSize;

                // deal with right
                while (searchStartIndex + r1 < reference.getNumberBases()
                        && query[q1]==reference[searchStartIndex + r1]
                        && q1 < queryLength)
                {
                    r1++;
                    q1++;
                }
                if (q1 < queryLength)
                {
                    cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
                    queryMatchCount = q1;
                }
                else
                {
                    cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
                    queryMatchCount = queryLength ;
                    break ;
                }
            }

            r1++;
            q1++;

            // try next
        } // end while (queryMatchCount < queryLength)
    }
template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple ( const QueryType  query,
const int  queryLength,
const ReferenceType  reference,
const ReferenceIndex  searchStartIndex,
const int  searchSize,
int &  matchedLength,
int &  mismatch 
) [inline]

Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.

Parameters:
queryinput query
queryLengthlength of query
referencereference sequence
searchStartIndexthe positino where search starts
searchSizethe total length in reference sequence that will be examine
matchedLengthstore how many bases are matched
mismatchstore how many bases are mismatched
Returns:
-1 for unsuccess return

Definition at line 79 of file GreedyTupleAligner.h.

Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().

    {
        // now use naive search,
        // TODO: will incorportate KMP serach later
        // TODO: adjust tolerance of mismatches
        const int MAX_MISMATCH=2;
        int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;

#if defined(DEBUG_GREEDY_ALIGNER)
        cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
#endif
        // here i is the matching position (inclusive)
        // j is the matched length
        for (int i = 0; i <= searchSize - tupleSize; i++)
        {
            int j = 0;
            mismatch = 0;
            while (j < queryLength)
            {
                if (searchStartIndex + i + j >= reference.getNumberBases())
                    break;
                if (query[j] != reference[searchStartIndex + i + j])
                {
                    mismatch++;
                    if (mismatch >= MAX_MISMATCH)
                        break;
                }
                j++;
            }

            if (j>0 && (j==queryLength)) j--;

            while (searchStartIndex +i +j < reference.getNumberBases()
                    && ((j+1) > mismatch)
                    && mismatch>0
                    && query[j] != reference[searchStartIndex + i+j])
            {
                // if pattern matching goes beyong the preset mismatch cutoff,
                // we will have to go backwards
                j--;
                mismatch--;
            }

            int score = j - mismatch;

            if (score > bestScore)
            {
                bestPos = i;
                bestScore = score;
                bestMismatch = mismatch;
                bestMatchedLength = j+1;
            }
        }

        if (bestScore > 0)
        {
            mismatch = bestMismatch;
            matchedLength = bestMatchedLength;
            return bestPos;
        }
        return -1;
    }

The documentation for this class was generated from the following file:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends