libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __SAM_FILTER_H__ 00019 #define __SAM_FILTER_H__ 00020 00021 #include "SamRecord.h" 00022 #include "GenomeSequence.h" 00023 00024 /// Class for helping to filter a SAM/BAM record. 00025 class SamFilter 00026 { 00027 public: 00028 /// Enum describing what sort of filtering was done. 00029 enum FilterStatus { 00030 NONE, ///< The filter did not affect the read. 00031 CLIPPED, ///< Filtering clipped the read. 00032 FILTERED ///< Filtering caused the read to be modified to unmapped. 00033 }; 00034 00035 /// Clip the read based on the specified mismatch threshold. 00036 /// \return how the read was affected, 00037 /// NONE if the read was not modified, 00038 /// CLIPPED if the read was clipped, 00039 /// FILTERED if the whole read would have been clipped so instead the 00040 /// read was modified to unmapped. 00041 static FilterStatus clipOnMismatchThreshold(SamRecord& record, 00042 GenomeSequence& refSequence, 00043 double mismatchThreshold); 00044 00045 /// Soft clip the record from the front and/or the back. 00046 /// \param record record to be clipped (input/output parameter). 00047 /// \param numFrontClips number of bases that should be clipped from the 00048 /// front of the sequence read. (total count, including any that are 00049 /// already clipped.) 00050 /// \param backClipPos number of bases that should be clipped from the 00051 /// back of the sequence read. (total count, including any that are 00052 /// already clipped.) 00053 static FilterStatus softClip(SamRecord& record, 00054 int32_t numFrontClips, 00055 int32_t numBackClips); 00056 00057 /// Soft clip the cigar from the front and/or the back, writing the value 00058 /// into the new cigar, updatedCigar & startPos are only updated if 00059 /// the return FilterStatus is CLIPPED. 00060 /// \param oldCigar cigar prior to clipping 00061 /// \param numFrontClips number of bases that should be clipped from the 00062 /// front of the sequence read. (total count, including any that are 00063 /// already clipped.) 00064 /// \param numBackClips number of bases that should be clipped from the 00065 /// back of the sequence read. (total count, including any that are 00066 /// already clipped.) 00067 /// \param startPos 0-based start position associated with the 00068 /// cigar prior to updating (input) and set to the 0-based start position 00069 /// after updating (output) the cigar if it was CLIPPED. 00070 /// \param updatedCigar set to the clipped cigar if CLIPPED (output param). 00071 static FilterStatus softClip(Cigar& oldCigar, 00072 int32_t numFrontClips, 00073 int32_t numBackClips, 00074 int32_t& startPos, 00075 CigarRoller& updatedCigar); 00076 00077 /// Filter the read based on the specified quality threshold. 00078 /// \return how the read was affected, 00079 /// NONE if the read was not modified, 00080 /// FILTERED if the read was modified to unmapped because it was over 00081 /// the quality threshold. 00082 static FilterStatus filterOnMismatchQuality(SamRecord& record, 00083 GenomeSequence& refSequence, 00084 uint32_t qualityThreshold, 00085 uint8_t defaultQualityInt); 00086 00087 /// Get the sum of the qualities of all mismatches in the record. 00088 /// \param record record on which to calculate the sum the mismatch qualities 00089 /// \param refSequence reference to use to check for mismatches. 00090 /// \param defaultQualityInt default value to use for the quality if no 00091 /// quality was specified in the read. 00092 /// \return sum of the qualities of mismatches 00093 static uint32_t sumMismatchQuality(SamRecord& record, 00094 GenomeSequence& refSequence, 00095 uint8_t defaultQualityInt); 00096 00097 /// Filter the read by marking it as unmapped. 00098 static void filterRead(SamRecord& record); 00099 }; 00100 00101 #endif 00102