SamFilter.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __SAM_FILTER_H__
00019 #define __SAM_FILTER_H__
00020 
00021 #include "SamRecord.h"
00022 #include "GenomeSequence.h"
00023 
00024 class SamFilter
00025 {
00026 public:
00027     enum FilterStatus {
00028         NONE, // The filter did not affect the read.
00029         CLIPPED, // Filtering clipped the read.
00030         FILTERED // Filtering caused the read to be modified to unmapped.
00031     };
00032 
00033     // Clip the read based on the specified mismatch threshold.
00034     // Returns how the read was affected, 
00035     //     NONE if the read was not modified,
00036     //     CLIPPED if the read was clipped,
00037     //     FILTERED if the whole read would have been clipped so instead the
00038     //              read was modified to unmapped.
00039     static FilterStatus clipOnMismatchThreshold(SamRecord& record, 
00040                                                 GenomeSequence& refSequence,
00041                                                 double mismatchThreshold);
00042 
00043     /// Soft clip the record from the front and/or the back.
00044     /// \param record record to be clipped (input/output parameter).
00045     /// \param numFrontClips number of bases that should be clipped from the
00046     /// front of the sequence read.  (total count, including any that are
00047     /// already clipped.)
00048     /// \param backClipPos number of bases that should be clipped from the
00049     /// back of the sequence read.  (total count, including any that are
00050     /// already clipped.)
00051     static FilterStatus softClip(SamRecord& record,
00052                                  int32_t numFrontClips, 
00053                                  int32_t numBackClips);
00054 
00055     /// Soft clip the cigar from the front and/or the back, writing the value
00056     /// into the new cigar, updatedCigar & startPos are only updated if
00057     /// the return FilterStatus is CLIPPED.
00058     /// \param oldCigar cigar prior to clipping
00059     /// \param numFrontClips number of bases that should be clipped from the
00060     /// front of the sequence read.  (total count, including any that are
00061     /// already clipped.)
00062     /// \param numBackClips number of bases that should be clipped from the
00063     /// back of the sequence read.  (total count, including any that are
00064     /// already clipped.)
00065     /// \param startPos 0-based start position associated with the
00066     /// cigar prior to updating (input) and set to the 0-based start position
00067     /// after updating (output) the cigar if it was CLIPPED.
00068     /// \param updatedCigar set to the clipped cigar if CLIPPED (output param).
00069     static FilterStatus softClip(Cigar& oldCigar, 
00070                                  int32_t numFrontClips,
00071                                  int32_t numBackClips,
00072                                  int32_t& startPos,
00073                                  CigarRoller& updatedCigar);
00074 
00075     // Filter the read based on the specified quality threshold.
00076     // Returns how the read was affected, 
00077     //     NONE if the read was not modified,
00078     //     FILTERED if the read was modified to unmapped because it was over
00079     //              the quality threshold.
00080     static FilterStatus filterOnMismatchQuality(SamRecord& record,
00081                                                 GenomeSequence& refSequence,
00082                                                 uint32_t qualityThreshold, 
00083                                                 uint8_t defaultQualityInt);
00084     
00085     static uint32_t sumMismatchQuality(SamRecord& record, 
00086                                        GenomeSequence& refSequence,
00087                                        uint8_t defaultQualityInt);
00088 
00089     // Filter the read out (mark it as unmapped.
00090     static void filterRead(SamRecord& record);
00091 };
00092 
00093 #endif
00094 
Generated on Tue Sep 6 17:51:59 2011 for libStatGen Software by  doxygen 1.6.3