libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010-2012 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __BAM_INDEX_H__ 00019 #define __BAM_INDEX_H__ 00020 00021 #include <stdint.h> 00022 #include <vector> 00023 #include <map> 00024 #include <stdlib.h> 00025 00026 #include "IndexBase.h" 00027 00028 #include "InputFile.h" 00029 #include "SamStatus.h" 00030 00031 class BamIndex : public IndexBase 00032 { 00033 public: 00034 00035 BamIndex(); 00036 virtual ~BamIndex(); 00037 00038 /// Reset the member data for a new index file. 00039 virtual void resetIndex(); 00040 00041 // Read & parse the specified index file. 00042 /// \param filename the bam index file to be read. 00043 /// \return the status of the read. 00044 SamStatus::Status readIndex(const char* filename); 00045 00046 /// Get the list of chunks associated with this region. 00047 /// For an entire reference ID, set start and end to -1. 00048 /// To start at the beginning of the region, set start to 0/-1. 00049 /// To go to the end of the region, set end to -1. 00050 bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, 00051 SortedChunkList& chunkList); 00052 00053 uint64_t getMaxOffset() const; 00054 00055 /// Get the minimum and maximum file offsets for the specfied reference ID. 00056 /// \param refID the reference ID to locate in the file. 00057 /// \param minOffset returns the min file offset for the specified reference 00058 /// \param maxOffset returns the max file offset for the specified reference 00059 /// \return whether or not the reference was found in the file 00060 bool getReferenceMinMax(int32_t refID, 00061 uint64_t& minOffset, 00062 uint64_t& maxOffset) const; 00063 00064 /// Get the number of mapped reads for this reference id. Returns -1 for 00065 /// out of range refIDs. 00066 /// \param refID reference ID for which to extract the number of mapped reads. 00067 /// \return number of mapped reads for the specified reference id. 00068 int32_t getNumMappedReads(int32_t refID); 00069 00070 /// Get the number of unmapped reads for this reference id. Returns -1 for 00071 /// out of range refIDs. 00072 /// \param refID reference ID for which to extract the number of unmapped reads. 00073 /// \return number of unmapped reads for the specified reference id 00074 int32_t getNumUnMappedReads(int32_t refID); 00075 00076 /// Print the index information. 00077 /// \param refID reference ID for which to print info for. -1 means print for all references. 00078 /// \param summary whether or not to just print a summary (defaults to false). The summary just contains summary info for each reference and not every bin/chunk. 00079 void printIndex(int32_t refID, bool summary = false); 00080 00081 // Number of reference sequences. 00082 /// The number used for an unknown number of reads. 00083 static const int32_t UNKNOWN_NUM_READS = -1; 00084 00085 /// The number used for the reference id of unmapped reads. 00086 static const int32_t REF_ID_UNMAPPED = -1; 00087 00088 /// The number used to indicate that all reference ids should be used. 00089 static const int32_t REF_ID_ALL = -2; 00090 00091 private: 00092 uint64_t maxOverallOffset; 00093 00094 int32_t myUnMappedNumReads; 00095 }; 00096 00097 00098 #endif