libStatGen Software  1
BamIndex.h
00001 /*
00002  *  Copyright (C) 2010-2012  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __BAM_INDEX_H__
00019 #define __BAM_INDEX_H__
00020 
00021 #include <stdint.h>
00022 #include <vector>
00023 #include <map>
00024 #include <stdlib.h>
00025 
00026 #include "IndexBase.h"
00027 
00028 #include "InputFile.h"
00029 #include "SamStatus.h"
00030 
00031 class BamIndex : public IndexBase
00032 {
00033 public:
00034 
00035     BamIndex();
00036     virtual ~BamIndex();
00037 
00038     /// Reset the member data for a new index file.
00039     virtual void resetIndex();
00040 
00041     // Read & parse the specified index file.
00042     /// \param filename the bam index file to be read.
00043     /// \return the status of the read.
00044     SamStatus::Status readIndex(const char* filename);
00045 
00046     /// Get the list of chunks associated with this region.
00047     /// For an entire reference ID, set start and end to -1.
00048     /// To start at the beginning of the region, set start to 0/-1.
00049     /// To go to the end of the region, set end to -1.
00050     bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, 
00051                             SortedChunkList& chunkList);
00052 
00053     uint64_t getMaxOffset() const;
00054 
00055     /// Get the minimum and maximum file offsets for the specfied reference ID.
00056     /// \param refID the reference ID to locate in the file.
00057     /// \param minOffset returns the min file offset for the specified reference
00058     /// \param maxOffset returns the max file offset for the specified reference
00059     /// \return whether or not the reference was found in the file
00060     bool getReferenceMinMax(int32_t refID, 
00061                             uint64_t& minOffset, 
00062                             uint64_t& maxOffset) const;
00063 
00064     /// Get the number of mapped reads for this reference id.  Returns -1 for
00065     /// out of range refIDs.
00066     /// \param refID reference ID for which to extract the number of mapped reads.
00067     /// \return number of mapped reads for the specified reference id.
00068     int32_t getNumMappedReads(int32_t refID);
00069 
00070     /// Get the number of unmapped reads for this reference id.  Returns -1 for
00071     /// out of range refIDs.
00072     /// \param refID reference ID for which to extract the number of unmapped reads.
00073     /// \return number of unmapped reads for the specified reference id
00074     int32_t getNumUnMappedReads(int32_t refID);
00075 
00076     /// Print the index information.
00077     /// \param refID reference ID for which to print info for.  -1 means print for all references.
00078     /// \param summary whether or not to just print a summary (defaults to false).  The summary just contains summary info for each reference and not every bin/chunk.
00079     void printIndex(int32_t refID, bool summary = false);
00080 
00081     // Number of reference sequences.
00082     /// The number used for an unknown number of reads.
00083     static const int32_t UNKNOWN_NUM_READS = -1;
00084 
00085     /// The number used for the reference id of unmapped reads.
00086     static const int32_t REF_ID_UNMAPPED = -1;
00087 
00088     /// The number used to indicate that all reference ids should be used.
00089     static const int32_t REF_ID_ALL = -2;
00090 
00091 private:
00092     uint64_t maxOverallOffset;
00093 
00094     int32_t myUnMappedNumReads;
00095 };
00096 
00097 
00098 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends