SamQuerySeqWithRefHelper.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __SAM_QUERY_SEQ_WITH_REF_HELPER_H__
00019 #define __SAM_QUERY_SEQ_WITH_REF_HELPER_H__
00020 
00021 #include <stdint.h>
00022 
00023 #include "SamRecord.h"
00024 #include "GenomeSequence.h"
00025 
00026 /// This class contains the match/mismatch information
00027 /// between the reference and a read for a single base.
00028 class SamSingleBaseMatchInfo
00029 {
00030 public:
00031     /// More types can be added later as needed.
00032     enum Type {UNKNOWN, MATCH, MISMATCH};
00033 
00034     SamSingleBaseMatchInfo();
00035     ~SamSingleBaseMatchInfo();
00036 
00037 
00038     /// Get the type (match/mismatch/unknown) for this object.
00039     Type getType();
00040 
00041     /// Get the query index for this object.
00042     int32_t getQueryIndex();
00043 
00044     /// Set the type (match/mismatch/unkown) for this object.
00045     void setType(Type newType);
00046 
00047     /// Set the query index for this object.
00048     void setQueryIndex(int32_t queryIndex);
00049 
00050 private:
00051     Type myType;
00052     int32_t myQueryIndex;
00053 };
00054 
00055 /// Iterates through the query and compare with reference.
00056 /// NOTE: References to the GenomeSequence and SamRecord are stored, the objects
00057 /// are not copied, so they must remain valid as long as this class is used.
00058 class SamQuerySeqWithRefIter
00059 {
00060 public:
00061     SamQuerySeqWithRefIter(SamRecord& record, GenomeSequence& refSequence,
00062                            bool forward = true);
00063     virtual ~SamQuerySeqWithRefIter();
00064     
00065     /// Reset to start at the beginning of the record.
00066     /// This will re-read values from SamRecord, so can be used if it has
00067     /// changed to contain information for a new record.
00068     /// \param forward true means to start from the beginning and go to the end;
00069     /// false means to start from the end and go to the beginning.
00070     /// \return true if successfully reset; false if failed to read the Cigar.
00071     bool reset(bool forward = true);
00072     
00073     /// Returns information for the next position where the query and the 
00074     /// reference match or mismatch.  To be a match or mismatch, both the query
00075     /// and reference must have a base that is not 'N'.
00076     /// This means:
00077     ///    insertions and deletions are not mismatches or matches.
00078     ///    'N' bases are not matches or mismatches
00079     /// \param matchMismatchInfo return parameter with the information about
00080     /// the matching/mismatching base.
00081     /// \return true if there was another match/mismatch
00082     /// (matchMismatchInfo was set); false if not.
00083     bool getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo);
00084     
00085 private:
00086 
00087     SamQuerySeqWithRefIter();
00088     
00089     void nextIndex();
00090 
00091     SamRecord& myRecord;
00092     GenomeSequence& myRefSequence;
00093     Cigar* myCigar;
00094     uint32_t myStartOfReadOnRefIndex;
00095     int32_t myQueryIndex;
00096     bool myForward;
00097 };
00098 
00099 
00100 /// Contains methods for converting between the query sequence and reference.
00101 class SamQuerySeqWithRef
00102 {
00103 public:
00104     /// Gets the sequence with '=' in any position where the sequence matches
00105     /// the reference.  
00106     /// NOTE: 'N' in both the sequence and the reference is not considered a
00107     /// match.
00108     /// \param currentSeq sequence that should be converted
00109     /// \param seq0BasedPos 0 based start position of currentSeq on the reference.
00110     /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference)
00111     /// \param referenceName reference name associated with this sequence
00112     /// \param refSequence reference sequence object
00113     /// \param updatedSeq return parameter that this method sets to the
00114     ///  current sequence, replacing any matches to the reference with '='.
00115     static void seqWithEquals(const char* currentSeq,
00116                               int32_t seq0BasedPos,
00117                               Cigar& cigar, 
00118                               const char* referenceName,
00119                               const GenomeSequence& refSequence,
00120                               std::string& updatedSeq);
00121 
00122     /// Gets the sequence converting '=' to the appropriate base using the
00123     /// reference.
00124     /// \param currentSeq sequence that should be converted
00125     /// \param seq0BasedPos 0 based start position of currentSeq on the reference.
00126     /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference)
00127     /// \param referenceName reference name associated with this sequence
00128     /// \param refSequence reference sequence object
00129     /// \param updatedSeq return parameter that this method sets to the
00130     ///  current sequence, replacing any '=' with the base from the reference.
00131     static void seqWithoutEquals(const char* currentSeq,
00132                                  int32_t seq0BasedPos,
00133                                  Cigar& cigar, 
00134                                  const char* referenceName,
00135                                  const GenomeSequence& refSequence,
00136                                  std::string& updatedSeq);
00137 
00138 private:
00139     SamQuerySeqWithRef();
00140 };
00141 #endif
Generated on Tue Sep 6 17:51:59 2011 for libStatGen Software by  doxygen 1.6.3