libStatGen Software  1
Tabix.h
00001 /*
00002  *  Copyright (C) 2012-2013  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __TABIX_H__
00019 #define __TABIX_H__
00020 
00021 #include <stdint.h>
00022 #include <vector>
00023 #include <map>
00024 #include <stdlib.h>
00025 
00026 #include "IndexBase.h"
00027 
00028 #include "InputFile.h"
00029 #include "StatGenStatus.h"
00030 
00031 class Tabix : public IndexBase
00032 {
00033 public:
00034 
00035     enum Format
00036         { 
00037             FORMAT_GENERIC = 0,
00038             FORMAT_SAM = 1,
00039             FORMAT_VCF = 2
00040         };
00041 
00042     Tabix();
00043     virtual ~Tabix();
00044 
00045     /// Reset the member data for a new index file.
00046     void resetIndex();
00047 
00048     // Read & parse the specified index file.
00049     /// \param filename the bam index file to be read.
00050     /// \return the status of the read.
00051     StatGenStatus::Status readIndex(const char* filename);
00052 
00053     /// Get the starting file offset to look for the specified start position.
00054     /// For an entire reference ID, set start to -1.
00055     /// To start at the beginning of the region, set start to 0/-1.
00056     bool getStartPos(const char* refName, int32_t start,
00057                      uint64_t& fileStartPos) const;
00058 
00059     /// Return the reference name at the specified index or
00060     /// throws an exception if out of range.
00061     const char* getRefName(unsigned int indexNum) const;
00062 
00063     // Get the format of this tabix file.
00064     inline int32_t getFormat() const { return myFormat.format; }
00065 
00066 private:
00067     struct TabixFormat
00068     {
00069         int32_t format;
00070         int32_t col_seq;
00071         int32_t col_beg;
00072         int32_t col_end;
00073         int32_t meta; // character that starts header lines
00074         int32_t skip; // Number of lines to skip from putting into the index.
00075     };
00076 
00077     TabixFormat myFormat;
00078 
00079     char* myChromNamesBuffer;
00080 
00081     // vector pointing to the chromosome names.
00082     std::vector<const char*> myChromNamesVector;
00083 };
00084 
00085 
00086 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends