00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __SAM_FILTER_H__ 00019 #define __SAM_FILTER_H__ 00020 00021 #include "SamRecord.h" 00022 #include "GenomeSequence.h" 00023 00024 class SamFilter 00025 { 00026 public: 00027 enum FilterStatus { 00028 NONE, // The filter did not affect the read. 00029 CLIPPED, // Filtering clipped the read. 00030 FILTERED // Filtering caused the read to be modified to unmapped. 00031 }; 00032 00033 // Clip the read based on the specified mismatch threshold. 00034 // Returns how the read was affected, 00035 // NONE if the read was not modified, 00036 // CLIPPED if the read was clipped, 00037 // FILTERED if the whole read would have been clipped so instead the 00038 // read was modified to unmapped. 00039 static FilterStatus clipOnMismatchThreshold(SamRecord& record, 00040 GenomeSequence& refSequence, 00041 double mismatchThreshold); 00042 00043 /// Soft clip the record from the front and/or the back. 00044 /// \param record record to be clipped (input/output parameter). 00045 /// \param numFrontClips number of bases that should be clipped from the 00046 /// front of the sequence read. (total count, including any that are 00047 /// already clipped.) 00048 /// \param backClipPos number of bases that should be clipped from the 00049 /// back of the sequence read. (total count, including any that are 00050 /// already clipped.) 00051 static FilterStatus softClip(SamRecord& record, 00052 int32_t numFrontClips, 00053 int32_t numBackClips); 00054 00055 /// Soft clip the cigar from the front and/or the back, writing the value 00056 /// into the new cigar, updatedCigar & startPos are only updated if 00057 /// the return FilterStatus is CLIPPED. 00058 /// \param oldCigar cigar prior to clipping 00059 /// \param numFrontClips number of bases that should be clipped from the 00060 /// front of the sequence read. (total count, including any that are 00061 /// already clipped.) 00062 /// \param numBackClips number of bases that should be clipped from the 00063 /// back of the sequence read. (total count, including any that are 00064 /// already clipped.) 00065 /// \param startPos 0-based start position associated with the 00066 /// cigar prior to updating (input) and set to the 0-based start position 00067 /// after updating (output) the cigar if it was CLIPPED. 00068 /// \param updatedCigar set to the clipped cigar if CLIPPED (output param). 00069 static FilterStatus softClip(Cigar& oldCigar, 00070 int32_t numFrontClips, 00071 int32_t numBackClips, 00072 int32_t& startPos, 00073 CigarRoller& updatedCigar); 00074 00075 // Filter the read based on the specified quality threshold. 00076 // Returns how the read was affected, 00077 // NONE if the read was not modified, 00078 // FILTERED if the read was modified to unmapped because it was over 00079 // the quality threshold. 00080 static FilterStatus filterOnMismatchQuality(SamRecord& record, 00081 GenomeSequence& refSequence, 00082 uint32_t qualityThreshold, 00083 uint8_t defaultQualityInt); 00084 00085 static uint32_t sumMismatchQuality(SamRecord& record, 00086 GenomeSequence& refSequence, 00087 uint8_t defaultQualityInt); 00088 00089 // Filter the read out (mark it as unmapped. 00090 static void filterRead(SamRecord& record); 00091 }; 00092 00093 #endif 00094