libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2012-2013 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __TABIX_H__ 00019 #define __TABIX_H__ 00020 00021 #include <stdint.h> 00022 #include <vector> 00023 #include <map> 00024 #include <stdlib.h> 00025 00026 #include "IndexBase.h" 00027 00028 #include "InputFile.h" 00029 #include "StatGenStatus.h" 00030 00031 class Tabix : public IndexBase 00032 { 00033 public: 00034 00035 enum Format 00036 { 00037 FORMAT_GENERIC = 0, 00038 FORMAT_SAM = 1, 00039 FORMAT_VCF = 2 00040 }; 00041 00042 Tabix(); 00043 virtual ~Tabix(); 00044 00045 /// Reset the member data for a new index file. 00046 void resetIndex(); 00047 00048 // Read & parse the specified index file. 00049 /// \param filename the bam index file to be read. 00050 /// \return the status of the read. 00051 StatGenStatus::Status readIndex(const char* filename); 00052 00053 /// Get the starting file offset to look for the specified start position. 00054 /// For an entire reference ID, set start to -1. 00055 /// To start at the beginning of the region, set start to 0/-1. 00056 bool getStartPos(const char* refName, int32_t start, 00057 uint64_t& fileStartPos) const; 00058 00059 /// Return the reference name at the specified index or 00060 /// throws an exception if out of range. 00061 const char* getRefName(unsigned int indexNum) const; 00062 00063 // Get the format of this tabix file. 00064 inline int32_t getFormat() const { return myFormat.format; } 00065 00066 private: 00067 struct TabixFormat 00068 { 00069 int32_t format; 00070 int32_t col_seq; 00071 int32_t col_beg; 00072 int32_t col_end; 00073 int32_t meta; // character that starts header lines 00074 int32_t skip; // Number of lines to skip from putting into the index. 00075 }; 00076 00077 TabixFormat myFormat; 00078 00079 char* myChromNamesBuffer; 00080 00081 // vector pointing to the chromosome names. 00082 std::vector<const char*> myChromNamesVector; 00083 }; 00084 00085 00086 #endif