libStatGen Software  1
SamFilter.h
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __SAM_FILTER_H__
00019 #define __SAM_FILTER_H__
00020 
00021 #include "SamRecord.h"
00022 #include "GenomeSequence.h"
00023 
00024 /// Class for helping to filter a SAM/BAM record.
00025 class SamFilter
00026 {
00027 public:
00028     /// Enum describing what sort of filtering was done.
00029     enum FilterStatus {
00030         NONE,    ///< The filter did not affect the read.
00031         CLIPPED, ///< Filtering clipped the read.
00032         FILTERED ///< Filtering caused the read to be modified to unmapped.
00033     };
00034 
00035     /// Clip the read based on the specified mismatch threshold.
00036     /// \return how the read was affected, 
00037     ///     NONE if the read was not modified,
00038     ///     CLIPPED if the read was clipped,
00039     ///     FILTERED if the whole read would have been clipped so instead the
00040     ///              read was modified to unmapped.
00041     static FilterStatus clipOnMismatchThreshold(SamRecord& record, 
00042                                                 GenomeSequence& refSequence,
00043                                                 double mismatchThreshold);
00044 
00045     /// Soft clip the record from the front and/or the back.
00046     /// \param record record to be clipped (input/output parameter).
00047     /// \param numFrontClips number of bases that should be clipped from the
00048     /// front of the sequence read.  (total count, including any that are
00049     /// already clipped.)
00050     /// \param backClipPos number of bases that should be clipped from the
00051     /// back of the sequence read.  (total count, including any that are
00052     /// already clipped.)
00053     static FilterStatus softClip(SamRecord& record,
00054                                  int32_t numFrontClips, 
00055                                  int32_t numBackClips);
00056 
00057     /// Soft clip the cigar from the front and/or the back, writing the value
00058     /// into the new cigar, updatedCigar & startPos are only updated if
00059     /// the return FilterStatus is CLIPPED.
00060     /// \param oldCigar cigar prior to clipping
00061     /// \param numFrontClips number of bases that should be clipped from the
00062     /// front of the sequence read.  (total count, including any that are
00063     /// already clipped.)
00064     /// \param numBackClips number of bases that should be clipped from the
00065     /// back of the sequence read.  (total count, including any that are
00066     /// already clipped.)
00067     /// \param startPos 0-based start position associated with the
00068     /// cigar prior to updating (input) and set to the 0-based start position
00069     /// after updating (output) the cigar if it was CLIPPED.
00070     /// \param updatedCigar set to the clipped cigar if CLIPPED (output param).
00071     static FilterStatus softClip(Cigar& oldCigar, 
00072                                  int32_t numFrontClips,
00073                                  int32_t numBackClips,
00074                                  int32_t& startPos,
00075                                  CigarRoller& updatedCigar);
00076 
00077     /// Filter the read based on the specified quality threshold.
00078     /// \return how the read was affected, 
00079     ///     NONE if the read was not modified,
00080     ///     FILTERED if the read was modified to unmapped because it was over
00081     ///              the quality threshold.
00082     static FilterStatus filterOnMismatchQuality(SamRecord& record,
00083                                                 GenomeSequence& refSequence,
00084                                                 uint32_t qualityThreshold, 
00085                                                 uint8_t defaultQualityInt);
00086     
00087     /// Get the sum of the qualities of all mismatches in the record.
00088     /// \param record record on which to calculate the sum the mismatch qualities
00089     /// \param refSequence reference to use to check for mismatches.
00090     /// \param defaultQualityInt default value to use for the quality if no
00091     ///        quality was specified in the read.
00092     /// \return sum of the qualities of mismatches
00093     static uint32_t sumMismatchQuality(SamRecord& record, 
00094                                        GenomeSequence& refSequence,
00095                                        uint8_t defaultQualityInt);
00096 
00097     /// Filter the read by marking it as unmapped.
00098     static void filterRead(SamRecord& record);
00099 };
00100 
00101 #endif
00102 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends