00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __SAM_QUERY_SEQ_WITH_REF_HELPER_H__ 00019 #define __SAM_QUERY_SEQ_WITH_REF_HELPER_H__ 00020 00021 #include <stdint.h> 00022 00023 #include "SamRecord.h" 00024 #include "GenomeSequence.h" 00025 00026 /// This class contains the match/mismatch information 00027 /// between the reference and a read for a single base. 00028 class SamSingleBaseMatchInfo 00029 { 00030 public: 00031 /// More types can be added later as needed. 00032 enum Type {UNKNOWN, MATCH, MISMATCH}; 00033 00034 SamSingleBaseMatchInfo(); 00035 ~SamSingleBaseMatchInfo(); 00036 00037 00038 /// Get the type (match/mismatch/unknown) for this object. 00039 Type getType(); 00040 00041 /// Get the query index for this object. 00042 int32_t getQueryIndex(); 00043 00044 /// Set the type (match/mismatch/unkown) for this object. 00045 void setType(Type newType); 00046 00047 /// Set the query index for this object. 00048 void setQueryIndex(int32_t queryIndex); 00049 00050 private: 00051 Type myType; 00052 int32_t myQueryIndex; 00053 }; 00054 00055 /// Iterates through the query and compare with reference. 00056 /// NOTE: References to the GenomeSequence and SamRecord are stored, the objects 00057 /// are not copied, so they must remain valid as long as this class is used. 00058 class SamQuerySeqWithRefIter 00059 { 00060 public: 00061 SamQuerySeqWithRefIter(SamRecord& record, GenomeSequence& refSequence, 00062 bool forward = true); 00063 virtual ~SamQuerySeqWithRefIter(); 00064 00065 /// Reset to start at the beginning of the record. 00066 /// This will re-read values from SamRecord, so can be used if it has 00067 /// changed to contain information for a new record. 00068 /// \param forward true means to start from the beginning and go to the end; 00069 /// false means to start from the end and go to the beginning. 00070 /// \return true if successfully reset; false if failed to read the Cigar. 00071 bool reset(bool forward = true); 00072 00073 /// Returns information for the next position where the query and the 00074 /// reference match or mismatch. To be a match or mismatch, both the query 00075 /// and reference must have a base that is not 'N'. 00076 /// This means: 00077 /// insertions and deletions are not mismatches or matches. 00078 /// 'N' bases are not matches or mismatches 00079 /// \param matchMismatchInfo return parameter with the information about 00080 /// the matching/mismatching base. 00081 /// \return true if there was another match/mismatch 00082 /// (matchMismatchInfo was set); false if not. 00083 bool getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo); 00084 00085 private: 00086 00087 SamQuerySeqWithRefIter(); 00088 00089 void nextIndex(); 00090 00091 SamRecord& myRecord; 00092 GenomeSequence& myRefSequence; 00093 Cigar* myCigar; 00094 uint32_t myStartOfReadOnRefIndex; 00095 int32_t myQueryIndex; 00096 bool myForward; 00097 }; 00098 00099 00100 /// Contains methods for converting between the query sequence and reference. 00101 class SamQuerySeqWithRef 00102 { 00103 public: 00104 /// Gets the sequence with '=' in any position where the sequence matches 00105 /// the reference. 00106 /// NOTE: 'N' in both the sequence and the reference is not considered a 00107 /// match. 00108 /// \param currentSeq sequence that should be converted 00109 /// \param seq0BasedPos 0 based start position of currentSeq on the reference. 00110 /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference) 00111 /// \param referenceName reference name associated with this sequence 00112 /// \param refSequence reference sequence object 00113 /// \param updatedSeq return parameter that this method sets to the 00114 /// current sequence, replacing any matches to the reference with '='. 00115 static void seqWithEquals(const char* currentSeq, 00116 int32_t seq0BasedPos, 00117 Cigar& cigar, 00118 const char* referenceName, 00119 const GenomeSequence& refSequence, 00120 std::string& updatedSeq); 00121 00122 /// Gets the sequence converting '=' to the appropriate base using the 00123 /// reference. 00124 /// \param currentSeq sequence that should be converted 00125 /// \param seq0BasedPos 0 based start position of currentSeq on the reference. 00126 /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference) 00127 /// \param referenceName reference name associated with this sequence 00128 /// \param refSequence reference sequence object 00129 /// \param updatedSeq return parameter that this method sets to the 00130 /// current sequence, replacing any '=' with the base from the reference. 00131 static void seqWithoutEquals(const char* currentSeq, 00132 int32_t seq0BasedPos, 00133 Cigar& cigar, 00134 const char* referenceName, 00135 const GenomeSequence& refSequence, 00136 std::string& updatedSeq); 00137 00138 private: 00139 SamQuerySeqWithRef(); 00140 }; 00141 #endif