SamQuerySeqWithRefHelper.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdexcept>
00019 
00020 #include "SamQuerySeqWithRefHelper.h"
00021 #include "BaseUtilities.h"
00022 #include "SamFlag.h"
00023 
00024 SamQuerySeqWithRefIter::SamQuerySeqWithRefIter(SamRecord& record,
00025                                                GenomeSequence& refSequence,
00026                                                bool forward)
00027     : myRecord(record),
00028       myRefSequence(refSequence),
00029       myCigar(NULL),
00030       myStartOfReadOnRefIndex(0),
00031       myQueryIndex(0),
00032       myForward(forward)
00033 {
00034     myCigar = myRecord.getCigarInfo();   
00035     myStartOfReadOnRefIndex = 
00036         myRefSequence.getGenomePosition(myRecord.getReferenceName()) +
00037         myRecord.get0BasedPosition();
00038 
00039     if(!forward)
00040     {
00041         myQueryIndex = myRecord.getReadLength() - 1;
00042     }
00043 }
00044 
00045 
00046 SamQuerySeqWithRefIter::~SamQuerySeqWithRefIter()
00047 {
00048 }
00049 
00050 
00051 
00052 bool SamQuerySeqWithRefIter::reset(bool forward)
00053 {
00054     myCigar = myRecord.getCigarInfo();
00055     if(myCigar == NULL)
00056     {
00057         // Failed to get Cigar.
00058         return(false);
00059     }
00060 
00061     // Get where the position of where this read starts as mapped to the 
00062     // reference.
00063     myStartOfReadOnRefIndex = 
00064         myRefSequence.getGenomePosition(myRecord.getReferenceName()) +
00065         myRecord.get0BasedPosition();
00066 
00067     forward = myForward;
00068 
00069     if(myForward)
00070     {
00071         myQueryIndex = 0;
00072     }
00073     else
00074     {
00075         // reverse, so start at the last entry.
00076         myQueryIndex = myRecord.getReadLength() - 1;
00077     }
00078     return(true);
00079 }
00080 
00081 
00082 // Returns information for the next position where the query and the 
00083 // reference match or mismatch.  To be a match or mismatch, both the query
00084 // and reference must have a base that is not 'N'.
00085 // This means:
00086 //    insertions and deletions are not mismatches or matches.
00087 //    'N' bases are not matches or mismatches
00088 // NOTE: Only returns positions where both the query and the reference
00089 // Returns true if an entry was found, false if there are no more matches or
00090 // mismatches.
00091 bool SamQuerySeqWithRefIter::getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo)
00092 {
00093     // Check whether or not this read is mapped. 
00094     // If the read is not mapped, return no matches.
00095     if(!SamFlag::isMapped(myRecord.getFlag()))
00096     {
00097         // Not mapped.
00098         return(false);
00099     }
00100 
00101     // Check that the Cigar is set.
00102     if(myCigar == NULL)
00103     {
00104         // Error.
00105         throw(std::runtime_error("Cannot determine matches/mismatches since failed to retrieve the cigar"));
00106         return(false);
00107     }
00108 
00109     // Repull the read length from the record to check just in case the
00110     // record has changed length.
00111     // Loop until a match or mismatch is found as long as query index
00112     // is still within the read  (Loop is broken by a return.
00113     while((myQueryIndex < myRecord.getReadLength()) &&
00114           (myQueryIndex >= 0))
00115     {
00116         // Still more bases, look for a match/mismatch.
00117 
00118         // Get the reference offset for this read position.
00119         int32_t refOffset = myCigar->getRefOffset(myQueryIndex);
00120         if(refOffset == Cigar::INDEX_NA)
00121         {
00122             // This is either a softclip or an insertion
00123             // which do not count as a match or a mismatch, so
00124             // go to the next index.
00125             nextIndex();
00126             continue;
00127         }
00128         
00129         // Both the reference and the read have a base, so get the bases.
00130         char readBase = myRecord.getSequence(myQueryIndex);
00131         char refBase = myRefSequence[myStartOfReadOnRefIndex + refOffset];
00132        
00133         // If either the read or the reference bases are unknown, then
00134         // it does not count as a match or a mismatch.
00135         if(BaseUtilities::isAmbiguous(readBase) || 
00136            BaseUtilities::isAmbiguous(refBase))
00137         {
00138             // Either the reference base or the read base are unknown,
00139             // so skip this position.
00140             nextIndex();
00141             continue;
00142         }
00143 
00144         // Both the read & the reference have a known base, so it is either
00145         // a match or a mismatch.
00146         matchMismatchInfo.setQueryIndex(myQueryIndex);
00147 
00148         // Check if they are equal.
00149         if(BaseUtilities::areEqual(readBase, refBase))
00150         {
00151             // Match.
00152             matchMismatchInfo.setType(SamSingleBaseMatchInfo::MATCH);
00153             // Increment the query index to the next position.
00154             nextIndex();
00155             return(true);
00156         }
00157         else
00158         {
00159             // Mismatch
00160             matchMismatchInfo.setType(SamSingleBaseMatchInfo::MISMATCH);
00161             // Increment the query index to the next position.
00162             nextIndex();
00163             return(true);
00164         }
00165     }
00166 
00167     // No matches or mismatches were found, so return false.
00168     return(false);
00169 }
00170 
00171 
00172 void SamQuerySeqWithRefIter::nextIndex()
00173 {
00174     if(myForward)
00175     {
00176         ++myQueryIndex;
00177     }
00178     else
00179     {
00180         --myQueryIndex;
00181     }
00182 }
00183 
00184 
00185 SamSingleBaseMatchInfo::SamSingleBaseMatchInfo()
00186     : myType(UNKNOWN),
00187       myQueryIndex(0)
00188 {
00189 }
00190 
00191 
00192 SamSingleBaseMatchInfo::~SamSingleBaseMatchInfo()
00193 {
00194 }
00195 
00196 
00197 SamSingleBaseMatchInfo::Type SamSingleBaseMatchInfo::getType()
00198 {
00199     return(myType);
00200 }
00201 
00202 
00203 int32_t SamSingleBaseMatchInfo::getQueryIndex()
00204 {
00205     return(myQueryIndex);
00206 }
00207 
00208 
00209 void SamSingleBaseMatchInfo::setType(Type newType)
00210 {
00211     myType = newType;
00212 }
00213 
00214 
00215 void SamSingleBaseMatchInfo::setQueryIndex(int32_t queryIndex)
00216 {
00217     myQueryIndex = queryIndex;
00218 }
Generated on Wed Nov 17 15:38:27 2010 for StatGen Software by  doxygen 1.6.3