SamStatistics.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "SamStatistics.h"
00019 #include <iomanip>
00020 #include "SamFlag.h"
00021 
00022 SamStatistics::SamStatistics()
00023 {
00024     reset();
00025 }
00026 
00027 SamStatistics::~SamStatistics()
00028 {
00029     reset();
00030 }
00031 
00032 void SamStatistics::reset()
00033 {
00034     myReadCount = 0;
00035     myMappedReadCount = 0;
00036     myPairedReadCount = 0;
00037     myProperPairedReadCount = 0;
00038     myBaseCount = 0;
00039     myMappedReadBases = 0;
00040     myDupReadCount = 0;
00041     myQCFailureReadCount = 0;
00042 }
00043 
00044 
00045 bool SamStatistics::updateStatistics(SamRecord& samRecord)
00046 {
00047     // Each record has one read, so update the read count.
00048     ++myReadCount;
00049 
00050     int32_t readLen = samRecord.getReadLength();
00051 
00052     // Get the flag to determine the type or 
00053     // read (mapped, paired, proper paired).
00054     uint16_t flag = samRecord.getFlag();
00055 
00056     // If the read is mapped, update the mapped c
00057     if(SamFlag::isMapped(flag))
00058     {
00059         ++myMappedReadCount;
00060         myMappedReadBases += readLen;
00061     }
00062     if(SamFlag::isPaired(flag))
00063     {
00064         ++myPairedReadCount;
00065         if(SamFlag::isProperPair(flag))
00066         {
00067             ++myProperPairedReadCount;
00068         }
00069     }
00070     if(SamFlag::isDuplicate(flag))
00071     {
00072         ++myDupReadCount;
00073     }
00074     if(SamFlag::isQCFailure(flag))
00075     {
00076         ++myQCFailureReadCount;
00077     }
00078     
00079     // Increment the total number of bases.
00080     myBaseCount += readLen;
00081 
00082     return(true);
00083 }
00084 
00085 
00086 void SamStatistics::print()
00087 {
00088     double DIVIDE_UNITS = 1000000;
00089     std::string units = "(e6)";
00090 
00091     std::cerr << std::fixed << std::setprecision(2);
00092 
00093     // If total reads is less than DIVIDE_UNITS, just show the straight number.
00094     if(myReadCount < DIVIDE_UNITS)
00095     {
00096         DIVIDE_UNITS = 1;
00097         units.clear();
00098     }
00099     
00100     // Read Counts
00101     std::cerr << "TotalReads" << units << "\t"
00102               << myReadCount/DIVIDE_UNITS << std::endl;
00103     std::cerr << "MappedReads" << units << "\t" 
00104               << myMappedReadCount/DIVIDE_UNITS << std::endl;
00105     std::cerr << "PairedReads" << units << "\t" 
00106               << myPairedReadCount/DIVIDE_UNITS << std::endl;
00107     std::cerr << "ProperPair" << units << "\t" 
00108               << myProperPairedReadCount/DIVIDE_UNITS << std::endl;
00109     std::cerr << "DuplicateReads" << units << "\t" 
00110               << myDupReadCount/DIVIDE_UNITS << std::endl;
00111     std::cerr << "QCFailureReads" << units << "\t" 
00112               << myQCFailureReadCount/DIVIDE_UNITS << std::endl;
00113     std::cerr << std::endl;
00114 
00115     // Read Percentages
00116     std::cerr << "MappingRate(%)\t" 
00117               << 100 * myMappedReadCount/(double)myReadCount << std::endl;
00118     std::cerr << "PairedReads(%)\t" 
00119               << 100 * myPairedReadCount/(double)myReadCount << std::endl;
00120     std::cerr << "ProperPair(%)\t" 
00121               << 100 * myProperPairedReadCount/(double)myReadCount << std::endl;
00122     std::cerr << "DupRate(%)\t" 
00123               << 100 * myDupReadCount/(double)myReadCount << std::endl;
00124     std::cerr << "QCFailRate(%)\t" 
00125               << 100 * myQCFailureReadCount/(double)myReadCount << std::endl;
00126     std::cerr << std::endl;
00127 
00128     // Base Counts
00129     std::cerr << "TotalBases" << units << "\t"
00130               << myBaseCount/DIVIDE_UNITS << std::endl;
00131     std::cerr << "BasesInMappedReads" << units << "\t"
00132               << myMappedReadBases/DIVIDE_UNITS << std::endl;
00133 }
Generated on Tue Sep 6 17:51:59 2011 for libStatGen Software by  doxygen 1.6.3